akaros/kern/src/pmap.c
<<
>>
Prefs
   1/* Copyright (c) 2009,13 The Regents of the University of California
   2 * Barret Rhoden <brho@cs.berkeley.edu>
   3 * See LICENSE for details.
   4 *
   5 * Arch independent physical memory and page table management.
   6 *
   7 * For page allocation, check out the family of page_alloc files. */
   8
   9#include <arch/arch.h>
  10#include <arch/mmu.h>
  11
  12#include <error.h>
  13
  14#include <kmalloc.h>
  15#include <atomic.h>
  16#include <string.h>
  17#include <assert.h>
  18#include <pmap.h>
  19#include <process.h>
  20#include <stdio.h>
  21#include <mm.h>
  22#include <multiboot.h>
  23#include <arena.h>
  24#include <init.h>
  25
  26physaddr_t max_pmem = 0;  /* Total amount of physical memory (bytes) */
  27physaddr_t max_paddr = 0; /* Maximum addressable physical address */
  28size_t max_nr_pages = 0;  /* Number of addressable physical memory pages */
  29struct page *pages = 0;
  30struct multiboot_info *multiboot_kaddr = 0;
  31uintptr_t boot_freemem = 0;
  32uintptr_t boot_freelimit = 0;
  33
  34static size_t sizeof_mboot_mmentry(struct multiboot_mmap_entry *entry)
  35{
  36        /* Careful - len is a uint64 (need to cast down for 32 bit) */
  37        return (size_t)(entry->len);
  38}
  39
  40static void adjust_max_pmem(struct multiboot_mmap_entry *entry, void *data)
  41{
  42        if (entry->type != MULTIBOOT_MEMORY_AVAILABLE)
  43                return;
  44        /* Careful - addr + len is a uint64 (need to cast down for 32 bit) */
  45        max_pmem = MAX(max_pmem, (size_t)(entry->addr + entry->len));
  46}
  47
  48static void kpages_arena_init(void)
  49{
  50        void *kpages_pg;
  51
  52        kpages_pg = arena_alloc(base_arena, PGSIZE, MEM_WAIT);
  53        kpages_arena = arena_builder(kpages_pg, "kpages", PGSIZE, arena_alloc,
  54                                     arena_free, base_arena, 8 * PGSIZE);
  55}
  56
  57/**
  58 * @brief Initializes physical memory.  Determines the pmem layout, sets up the
  59 * base and kpages arenas, and turns on virtual memory/page tables.
  60 *
  61 * Regarding max_pmem vs max_paddr and max_nr_pages: max_pmem is the largest
  62 * physical address that is in a FREE region.  It includes RESERVED regions that
  63 * are below this point.  max_paddr is the largest physical address, <=
  64 * max_pmem, that the KERNBASE mapping can map.  It too may include reserved
  65 * ranges.  The 'pages' array will track all physical pages up to max_paddr.
  66 * There are max_nr_pages of them.  On 64 bit systems, max_pmem == max_paddr. */
  67void pmem_init(struct multiboot_info *mbi)
  68{
  69        mboot_detect_memory(mbi);
  70        mboot_print_mmap(mbi);
  71        /* adjust the max memory based on the mmaps, since the old detection
  72         * doesn't help much on 64 bit systems */
  73        mboot_foreach_mmap(mbi, adjust_max_pmem, 0);
  74        /* KERN_VMAP_TOP - KERNBASE is the max amount of virtual addresses we
  75         * can use for the physical memory mapping (aka - the KERNBASE mapping).
  76         * Should't be an issue on 64b, but is usually for 32 bit. */
  77        max_paddr = MIN(max_pmem, KERN_VMAP_TOP - KERNBASE);
  78        /* Note not all of this memory is free. */
  79        max_nr_pages = max_paddr / PGSIZE;
  80        printk("Max physical RAM (appx, bytes): %lu\n", max_pmem);
  81        printk("Max addressable physical RAM (appx): %lu\n", max_paddr);
  82        printk("Highest page number (including reserved): %lu\n", max_nr_pages);
  83        /* We should init the page structs, but zeroing happens to work, except
  84         * for the sems.  Those are init'd by the page cache before they are
  85         * used. */
  86        pages = (struct page*)boot_zalloc(max_nr_pages * sizeof(struct page),
  87                                          PGSIZE);
  88        base_arena_init(mbi);
  89        /* kpages will use some of the basic slab caches.  kmem_cache_init needs
  90         * to not do memory allocations (which it doesn't, and it can
  91         * base_alloc()). */
  92        kmem_cache_init();
  93        kpages_arena_init();
  94        printk("Base arena total mem: %lu\n", arena_amt_total(base_arena));
  95        vm_init();
  96
  97        static_assert(PROCINFO_NUM_PAGES*PGSIZE <= PTSIZE);
  98        static_assert(PROCDATA_NUM_PAGES*PGSIZE <= PTSIZE);
  99}
 100
 101static void set_largest_freezone(struct multiboot_mmap_entry *entry, void *data)
 102{
 103        struct multiboot_mmap_entry **boot_zone =
 104               (struct multiboot_mmap_entry**)data;
 105
 106        if (entry->type != MULTIBOOT_MEMORY_AVAILABLE)
 107                return;
 108        if (!*boot_zone || (sizeof_mboot_mmentry(entry) >
 109                           sizeof_mboot_mmentry(*boot_zone)))
 110                *boot_zone = entry;
 111}
 112
 113/* Initialize boot freemem and its limit.
 114 *
 115 * "end" is a symbol marking the end of the kernel.  This covers anything linked
 116 * in with the kernel (KFS, etc).  However, 'end' is a kernel load address,
 117 * which differs from kernbase addrs in 64 bit.  We need to use the kernbase
 118 * mapping for anything dynamic (because it could go beyond 1 GB).
 119 *
 120 * Ideally, we'll use the largest mmap zone, as reported by multiboot.  If we
 121 * don't have one (riscv), we'll just use the memory after the kernel.
 122 *
 123 * If we do have a zone, there is a chance we've already used some of it (for
 124 * the kernel, etc).  We'll use the lowest address in the zone that is
 125 * greater than "end" (and adjust the limit accordingly).  */
 126static void boot_alloc_init(void)
 127{
 128        extern char end[];
 129        uintptr_t boot_zone_start, boot_zone_end;
 130        uintptr_t end_kva = (uintptr_t)KBASEADDR(end);
 131        struct multiboot_mmap_entry *boot_zone = 0;
 132
 133        /* Find our largest mmap_entry; that will set bootzone */
 134        mboot_foreach_mmap(multiboot_kaddr, set_largest_freezone, &boot_zone);
 135        if (boot_zone) {
 136                boot_zone_start = (uintptr_t)KADDR(boot_zone->addr);
 137                /* one issue for 32b is that the boot_zone_end could be beyond
 138                 * max_paddr and even wrap-around.  Do the min check as a
 139                 * uint64_t.  The result should be a safe, unwrapped 32/64b when
 140                 * cast to physaddr_t. */
 141                boot_zone_end = (uintptr_t)KADDR(MIN(boot_zone->addr +
 142                                                     boot_zone->len,
 143                                                     (uint64_t)max_paddr));
 144                /* using KERNBASE (kva, btw) which covers the kernel and
 145                 * anything before it (like the stuff below EXTPHYSMEM on x86)
 146                 */
 147                if (regions_collide_unsafe(KERNBASE, end_kva,
 148                                           boot_zone_start, boot_zone_end))
 149                        boot_freemem = end_kva;
 150                else
 151                        boot_freemem = boot_zone_start;
 152                boot_freelimit = boot_zone_end;
 153        } else {
 154                boot_freemem = end_kva;
 155                boot_freelimit = max_paddr + KERNBASE;
 156        }
 157        printd("boot_zone: %p, paddr base: 0x%llx, paddr len: 0x%llx\n",
 158               boot_zone, boot_zone ? boot_zone->addr : 0,
 159               boot_zone ? boot_zone->len : 0);
 160        printd("boot_freemem: %p, boot_freelimit %p\n", boot_freemem,
 161               boot_freelimit);
 162}
 163
 164/* Low-level allocator, used before page_alloc is on.  Returns size bytes,
 165 * aligned to align (should be a power of 2).  Retval is a kernbase addr.  Will
 166 * panic on failure. */
 167void *boot_alloc(size_t amt, size_t align)
 168{
 169        uintptr_t retval;
 170
 171        if (!boot_freemem)
 172                boot_alloc_init();
 173        boot_freemem = ROUNDUP(boot_freemem, align);
 174        retval = boot_freemem;
 175        if (boot_freemem + amt > boot_freelimit){
 176                printk("boot_alloc: boot_freemem is 0x%x\n", boot_freemem);
 177                printk("boot_alloc: amt is %d\n", amt);
 178                printk("boot_freelimit is 0x%x\n", boot_freelimit);
 179                printk("boot_freemem + amt is > boot_freelimit\n");
 180                panic("Out of memory in boot alloc, you fool!\n");
 181        }
 182        boot_freemem += amt;
 183        printd("boot alloc from %p to %p\n", retval, boot_freemem);
 184        /* multiboot info probably won't ever conflict with our boot alloc */
 185        if (mboot_region_collides(multiboot_kaddr, retval, boot_freemem))
 186                panic("boot allocation could clobber multiboot info!");
 187        return (void*)retval;
 188}
 189
 190void *boot_zalloc(size_t amt, size_t align)
 191{
 192        /* boot_alloc panics on failure */
 193        void *v = boot_alloc(amt, align);
 194        memset(v, 0, amt);
 195        return v;
 196}
 197
 198/**
 199 * @brief Map the physical page 'pp' into the virtual address 'va' in page
 200 *        directory 'pgdir'
 201 *
 202 * Map the physical page 'pp' at virtual address 'va'.
 203 * The permissions (the low 12 bits) of the page table
 204 * entry should be set to 'perm|PTE_P'.
 205 *
 206 * Details:
 207 *   - If there is already a page mapped at 'va', it is page_remove()d.
 208 *   - If necessary, on demand, allocates a page table and inserts it into
 209 *     'pgdir'.
 210 *   - This saves your refcnt in the pgdir (refcnts going away soon).
 211 *   - The TLB must be invalidated if a page was formerly present at 'va'.
 212 *     (this is handled in page_remove)
 213 *
 214 * No support for jumbos here.  We will need to be careful when trying to
 215 * insert regular pages into something that was already jumbo.  We will
 216 * also need to be careful with our overloading of the PTE_PS and
 217 * PTE_PAT flags...
 218 *
 219 * @param[in] pgdir the page directory to insert the page into
 220 * @param[in] pp    a pointr to the page struct representing the
 221 *                  physical page that should be inserted.
 222 * @param[in] va    the virtual address where the page should be
 223 *                  inserted.
 224 * @param[in] perm  the permition bits with which to set up the
 225 *                  virtual mapping.
 226 *
 227 * @return ESUCCESS  on success
 228 * @return -ENOMEM   if a page table could not be allocated
 229 *                   into which the page should be inserted
 230 *
 231 */
 232int page_insert(pgdir_t pgdir, struct page *page, void *va, int perm)
 233{
 234        pte_t pte = pgdir_walk(pgdir, va, 1);
 235
 236        if (!pte_walk_okay(pte))
 237                return -ENOMEM;
 238        /* Leftover from older times, but we no longer suppor this: */
 239        assert(!pte_is_mapped(pte));
 240        pte_write(pte, page2pa(page), perm);
 241        return 0;
 242}
 243
 244/**
 245 * @brief Return the page mapped at virtual address 'va' in
 246 * page directory 'pgdir'.
 247 *
 248 * If pte_store is not NULL, then we store in it the address
 249 * of the pte for this page.  This is used by page_remove
 250 * but should not be used by other callers.
 251 *
 252 * For jumbos, right now this returns the first Page* in the 4MB range
 253 *
 254 * @param[in]  pgdir     the page directory from which we should do the lookup
 255 * @param[in]  va        the virtual address of the page we are looking up
 256 * @param[out] pte_store the address of the page table entry for the returned
 257 *                       page
 258 *
 259 * @return PAGE the page mapped at virtual address 'va'
 260 * @return NULL No mapping exists at virtual address 'va', or it's paged out
 261 */
 262page_t *page_lookup(pgdir_t pgdir, void *va, pte_t *pte_store)
 263{
 264        pte_t pte = pgdir_walk(pgdir, va, 0);
 265
 266        if (!pte_walk_okay(pte) || !pte_is_mapped(pte))
 267                return 0;
 268        if (pte_store)
 269                *pte_store = pte;
 270        return pa2page(pte_get_paddr(pte));
 271}
 272
 273/**
 274 * @brief Unmaps the physical page at virtual address 'va' in page directory
 275 * 'pgdir'.
 276 *
 277 * If there is no physical page at that address, this function silently
 278 * does nothing.
 279 *
 280 * Details:
 281 *   - The ref count on the physical page is decrement when the page is removed
 282 *   - The physical page is freed if the refcount reaches 0.
 283 *   - The pg table entry corresponding to 'va' is set to 0.
 284 *     (if such a PTE exists)
 285 *   - The TLB is invalidated if an entry is removes from the pg dir/pg table.
 286 *
 287 * This may be wonky wrt Jumbo pages and decref.
 288 *
 289 * @param pgdir the page directory from with the page sholuld be removed
 290 * @param va    the virtual address at which the page we are trying to
 291 *              remove is mapped
 292 * TODO: consider deprecating this, or at least changing how it works with TLBs.
 293 * Might want to have the caller need to manage the TLB.  Also note it is used
 294 * in env_user_mem_free, minus the walk. */
 295void page_remove(pgdir_t pgdir, void *va)
 296{
 297        pte_t pte;
 298        page_t *page;
 299
 300        pte = pgdir_walk(pgdir,va,0);
 301        if (!pte_walk_okay(pte) || pte_is_unmapped(pte))
 302                return;
 303
 304        if (pte_is_mapped(pte)) {
 305                /* TODO: (TLB) need to do a shootdown, inval sucks.  And might
 306                 * want to manage the TLB / free pages differently. (like by the
 307                 * caller).  Careful about the proc/memory lock here. */
 308                page = pa2page(pte_get_paddr(pte));
 309                pte_clear(pte);
 310                tlb_invalidate(pgdir, va);
 311                page_decref(page);
 312        } else if (pte_is_paged_out(pte)) {
 313                /* TODO: (SWAP) need to free this from the swap */
 314                panic("Swapping not supported!");
 315                pte_clear(pte);
 316        }
 317}
 318
 319/**
 320 * @brief Invalidate a TLB entry, but only if the page tables being
 321 * edited are the ones currently in use by the processor.
 322 *
 323 * TODO: (TLB) Need to sort this for cross core lovin'
 324 *
 325 * @param pgdir the page directory assocaited with the tlb entry
 326 *              we are trying to invalidate
 327 * @param va    the virtual address associated with the tlb entry
 328 *              we are trying to invalidate
 329 */
 330void tlb_invalidate(pgdir_t pgdir, void *va)
 331{
 332        // Flush the entry only if we're modifying the current address space.
 333        // For now, there is only one address space, so always invalidate.
 334        invlpg(va);
 335}
 336
 337static void __tlb_global(uint32_t srcid, long a0, long a1, long a2)
 338{
 339        tlb_flush_global();
 340}
 341
 342/* Does a global TLB flush on all cores. */
 343void tlb_shootdown_global(void)
 344{
 345        tlb_flush_global();
 346        if (booting)
 347                return;
 348        /* TODO: consider a helper for broadcast messages, though note that
 349         * we're doing our flush immediately, which our caller expects from us
 350         * before it returns. */
 351        for (int i = 0; i < num_cores; i++) {
 352                if (i == core_id())
 353                        continue;
 354                send_kernel_message(i, __tlb_global, 0, 0, 0, KMSG_IMMEDIATE);
 355        }
 356}
 357
 358/* Helper, returns true if any part of (start1, end1) is within (start2, end2).
 359 * Equality of endpoints (like end1 == start2) is okay.
 360 * Assumes no wrap-around. */
 361bool regions_collide_unsafe(uintptr_t start1, uintptr_t end1,
 362                            uintptr_t start2, uintptr_t end2)
 363{
 364        if (start1 <= start2) {
 365                if (end1 <= start2)
 366                        return FALSE;
 367                return TRUE;
 368        } else {
 369                if (end2 <= start1)
 370                        return FALSE;
 371                return TRUE;
 372        }
 373}
 374