1 /* See COPYRIGHT for copyright information. */
6 #include <inc/string.h>
7 #include <inc/assert.h>
10 #include <kern/kclock.h>
12 // These variables are set by i386_detect_memory()
13 static physaddr_t maxpa; // Maximum physical address
14 size_t npage; // Amount of physical memory (in pages)
15 static size_t basemem; // Amount of base memory (in bytes)
16 static size_t extmem; // Amount of extended memory (in bytes)
18 // These variables are set in i386_vm_init()
19 pde_t* boot_pgdir; // Virtual address of boot time page directory
20 physaddr_t boot_cr3; // Physical address of boot time page directory
21 static char* boot_freemem; // Pointer to next byte of free mem
23 struct Page* pages; // Virtual address of physical page array
24 static struct Page_list page_free_list; // Free list of physical pages
26 // Global descriptor table.
28 // The kernel and user segments are identical (except for the DPL).
29 // To load the SS register, the CPL must equal the DPL. Thus,
30 // we must duplicate the segments for the user and the kernel.
32 struct Segdesc gdt[] =
34 // 0x0 - unused (always faults -- for trapping NULL far pointers)
37 // 0x8 - kernel code segment
38 [GD_KT >> 3] = SEG(STA_X | STA_R, 0x0, 0xffffffff, 0),
40 // 0x10 - kernel data segment
41 [GD_KD >> 3] = SEG(STA_W, 0x0, 0xffffffff, 0),
43 // 0x18 - user code segment
44 [GD_UT >> 3] = SEG(STA_X | STA_R, 0x0, 0xffffffff, 3),
46 // 0x20 - user data segment
47 [GD_UD >> 3] = SEG(STA_W, 0x0, 0xffffffff, 3),
49 // 0x28 - tss, initialized in idt_init()
50 [GD_TSS >> 3] = SEG_NULL
53 struct Pseudodesc gdt_pd = {
54 sizeof(gdt) - 1, (unsigned long) gdt
60 return mc146818_read(r) | (mc146818_read(r + 1) << 8);
64 i386_detect_memory(void)
66 /* For shit BIOS reasons, this isn't seeing any more than 64MB,
67 * explained a little here:
68 * http://exec.h1.ru/docs/os-devel-faq/os-faq-memory.html
71 // CMOS tells us how many kilobytes there are
72 basemem = ROUNDDOWN(nvram_read(NVRAM_BASELO)*1024, PGSIZE);
73 extmem = ROUNDDOWN(nvram_read(NVRAM_EXTLO)*1024, PGSIZE);
75 // Calculate the maximum physical address based on whether
76 // or not there is any extended memory. See comment in <inc/memlayout.h>
78 maxpa = EXTPHYSMEM + extmem;
82 npage = maxpa / PGSIZE;
84 cprintf("Physical memory: %dK available, ", (int)(maxpa/1024));
85 cprintf("base = %dK, extended = %dK\n", (int)(basemem/1024), (int)(extmem/1024));
88 // --------------------------------------------------------------
89 // Set up initial memory mappings and turn on MMU.
90 // --------------------------------------------------------------
92 static void check_boot_pgdir(void);
95 // Allocate n bytes of physical memory aligned on an
96 // align-byte boundary. Align must be a power of two.
97 // Return kernel virtual address. Returned memory is uninitialized.
99 // If we're out of memory, boot_alloc should panic.
100 // This function may ONLY be used during initialization,
101 // before the page_free_list has been set up.
104 boot_alloc(uint32_t n, uint32_t align)
109 // Initialize boot_freemem if this is the first time.
110 // 'end' is a magic symbol automatically generated by the linker,
111 // which points to the end of the kernel's bss segment -
112 // i.e., the first virtual address that the linker
113 // did _not_ assign to any kernel code or global variables.
114 if (boot_freemem == 0)
117 // Step 1: round boot_freemem up to be aligned properly
118 if (((uintptr_t)boot_freemem & align - 1) != 0) {
119 boot_freemem = (char*)((uintptr_t)boot_freemem & ~(align - 1));
120 boot_freemem += align;
122 // Step 2: save current value of boot_freemem as allocated chunk
124 // Step 2.5: check if we can alloc
125 if (PADDR(boot_freemem + n) > maxpa)
126 panic("Out of memory in boot alloc, you fool!\n");
127 // Step 3: increase boot_freemem to record allocation
129 // Step 4: return allocated chunk
134 // Given pgdir, a pointer to a page directory,
135 // walk the 2-level page table structure to find
136 // the page table entry (PTE) for linear address la.
137 // Return a pointer to this PTE.
139 // If the relevant page table doesn't exist in the page directory:
140 // - If create == 0, return 0.
141 // - Otherwise allocate a new page table, install it into pgdir,
142 // and return a pointer into it.
143 // (Questions: What data should the new page table contain?
144 // And what permissions should the new pgdir entry have?
145 // Note that we use the 486-only "WP" feature of %cr0, which
146 // affects the way supervisor-mode writes are checked.)
148 // This function abstracts away the 2-level nature of
149 // the page directory by allocating new page tables
152 // boot_pgdir_walk may ONLY be used during initialization,
153 // before the page_free_list has been set up.
154 // It should panic on failure. (Note that boot_alloc already panics
158 boot_pgdir_walk(pde_t *pgdir, uintptr_t la, int create)
160 pde_t the_pde = pgdir[PDX(la)];
164 return (pte_t*)((pde_t)KADDR(PTE_ADDR(the_pde)) + PTX(la));
167 new_table = boot_alloc(PGSIZE, PGSIZE);
168 memset(new_table, 0, PGSIZE);
169 pgdir[PDX(la)] = (pde_t)PADDR(new_table) | PTE_P | PTE_W;
170 return (pte_t*)((pde_t)KADDR(PTE_ADDR(pgdir[PDX(la)])) + PTX(la));
174 // Map [la, la+size) of linear address space to physical [pa, pa+size)
175 // in the page table rooted at pgdir. Size is a multiple of PGSIZE.
176 // Use permission bits perm|PTE_P for the entries.
178 // This function may ONLY be used during initialization,
179 // before the page_free_list has been set up.
182 boot_map_segment(pde_t *pgdir, uintptr_t la, size_t size, physaddr_t pa, int perm)
186 // Set up a two-level page table:
187 // boot_pgdir is its linear (virtual) address of the root
188 // boot_cr3 is the physical adresss of the root
189 // Then turn on paging. Then effectively turn off segmentation.
190 // (i.e., the segment base addrs are set to zero).
192 // This function only sets up the kernel part of the address space
193 // (ie. addresses >= UTOP). The user part of the address space
194 // will be setup later.
196 // From UTOP to ULIM, the user is allowed to read but not write.
197 // Above ULIM the user cannot read (or write).
205 //////////////////////////////////////////////////////////////////////
206 // create initial page directory.
207 pgdir = boot_alloc(PGSIZE, PGSIZE);
208 memset(pgdir, 0, PGSIZE);
210 boot_cr3 = PADDR(pgdir);
212 //////////////////////////////////////////////////////////////////////
213 // Recursively insert PD in itself as a page table, to form
214 // a virtual page table at virtual address VPT.
215 // (For now, you don't have understand the greater purpose of the
216 // following two lines. Unless you are eagle-eyed, in which case you
217 // should already know.)
219 // Permissions: kernel RW, user NONE
220 pgdir[PDX(VPT)] = PADDR(pgdir)|PTE_W|PTE_P;
223 // Permissions: kernel R, user R
224 pgdir[PDX(UVPT)] = PADDR(pgdir)|PTE_U|PTE_P;
226 // Remove this line when you're ready to test this function.
227 panic("i386_vm_init: This function is not finished\n");
229 //////////////////////////////////////////////////////////////////////
230 // Map the kernel stack (symbol name "bootstack"). The complete VA
231 // range of the stack, [KSTACKTOP-PTSIZE, KSTACKTOP), breaks into two
233 // * [KSTACKTOP-KSTKSIZE, KSTACKTOP) -- backed by physical memory
234 // * [KSTACKTOP-PTSIZE, KSTACKTOP-KSTKSIZE) -- not backed => faults
235 // Permissions: kernel RW, user NONE
236 // Your code goes here:
238 //////////////////////////////////////////////////////////////////////
239 // Map all of physical memory at KERNBASE.
240 // Ie. the VA range [KERNBASE, 2^32) should map to
241 // the PA range [0, 2^32 - KERNBASE)
242 // We might not have 2^32 - KERNBASE bytes of physical memory, but
243 // we just set up the mapping anyway.
244 // Permissions: kernel RW, user NONE
245 // Your code goes here:
247 //////////////////////////////////////////////////////////////////////
248 // Make 'pages' point to an array of size 'npage' of 'struct Page'.
249 // The kernel uses this structure to keep track of physical pages;
250 // 'npage' equals the number of physical pages in memory. User-level
251 // programs get read-only access to the array as well.
252 // You must allocate the array yourself.
253 // Map this array read-only by the user at linear address UPAGES
254 // (ie. perm = PTE_U | PTE_P)
256 // - pages -- kernel RW, user NONE
257 // - the read-only version mapped at UPAGES -- kernel R, user R
258 // Your code goes here:
260 // Check that the initial page directory has been set up correctly.
263 //////////////////////////////////////////////////////////////////////
264 // On x86, segmentation maps a VA to a LA (linear addr) and
265 // paging maps the LA to a PA. I.e. VA => LA => PA. If paging is
266 // turned off the LA is used as the PA. Note: there is no way to
267 // turn off segmentation. The closest thing is to set the base
268 // address to 0, so the VA => LA mapping is the identity.
270 // Current mapping: VA KERNBASE+x => PA x.
271 // (segmentation base=-KERNBASE and paging is off)
273 // From here on down we must maintain this VA KERNBASE + x => PA x
274 // mapping, even though we are turning on paging and reconfiguring
277 // Map VA 0:4MB same as VA KERNBASE, i.e. to PA 0:4MB.
278 // (Limits our kernel to <4MB)
279 pgdir[0] = pgdir[PDX(KERNBASE)];
281 // Install page table.
286 cr0 |= CR0_PE|CR0_PG|CR0_AM|CR0_WP|CR0_NE|CR0_TS|CR0_EM|CR0_MP;
287 cr0 &= ~(CR0_TS|CR0_EM);
290 // Current mapping: KERNBASE+x => x => x.
291 // (x < 4MB so uses paging pgdir[0])
293 // Reload all segment registers.
294 asm volatile("lgdt gdt_pd");
295 asm volatile("movw %%ax,%%gs" :: "a" (GD_UD|3));
296 asm volatile("movw %%ax,%%fs" :: "a" (GD_UD|3));
297 asm volatile("movw %%ax,%%es" :: "a" (GD_KD));
298 asm volatile("movw %%ax,%%ds" :: "a" (GD_KD));
299 asm volatile("movw %%ax,%%ss" :: "a" (GD_KD));
300 asm volatile("ljmp %0,$1f\n 1:\n" :: "i" (GD_KT)); // reload cs
301 asm volatile("lldt %%ax" :: "a" (0));
303 // Final mapping: KERNBASE+x => KERNBASE+x => x.
305 // This mapping was only used after paging was turned on but
306 // before the segment registers were reloaded.
309 // Flush the TLB for good measure, to kill the pgdir[0] mapping.
314 // Checks that the kernel part of virtual address space
315 // has been setup roughly correctly(by i386_vm_init()).
317 // This function doesn't test every corner case,
318 // in fact it doesn't test the permission bits at all,
319 // but it is a pretty good sanity check.
321 static physaddr_t check_va2pa(pde_t *pgdir, uintptr_t va);
324 check_boot_pgdir(void)
332 n = ROUNDUP(npage*sizeof(struct Page), PGSIZE);
333 for (i = 0; i < n; i += PGSIZE)
334 assert(check_va2pa(pgdir, UPAGES + i) == PADDR(pages) + i);
338 for (i = 0; KERNBASE + i != 0; i += PGSIZE)
339 assert(check_va2pa(pgdir, KERNBASE + i) == i);
341 // check kernel stack
342 for (i = 0; i < KSTKSIZE; i += PGSIZE)
343 assert(check_va2pa(pgdir, KSTACKTOP - KSTKSIZE + i) == PADDR(bootstack) + i);
345 // check for zero/non-zero in PDEs
346 for (i = 0; i < NPDENTRIES; i++) {
350 case PDX(KSTACKTOP-1):
355 if (i >= PDX(KERNBASE))
358 assert(pgdir[i] == 0);
362 cprintf("check_boot_pgdir() succeeded!\n");
365 // This function returns the physical address of the page containing 'va',
366 // defined by the page directory 'pgdir'. The hardware normally performs
367 // this functionality for us! We define our own version to help check
368 // the check_boot_pgdir() function; it shouldn't be used elsewhere.
371 check_va2pa(pde_t *pgdir, uintptr_t va)
375 pgdir = &pgdir[PDX(va)];
376 if (!(*pgdir & PTE_P))
378 p = (pte_t*) KADDR(PTE_ADDR(*pgdir));
379 if (!(p[PTX(va)] & PTE_P))
381 return PTE_ADDR(p[PTX(va)]);
384 // --------------------------------------------------------------
385 // Tracking of physical pages.
386 // The 'pages' array has one 'struct Page' entry per physical page.
387 // Pages are reference counted, and free pages are kept on a linked list.
388 // --------------------------------------------------------------
391 // Initialize page structure and memory free list.
392 // After this point, ONLY use the functions below
393 // to allocate and deallocate physical memory via the page_free_list,
394 // and NEVER use boot_alloc() or the related boot-time functions above.
399 // The example code here marks all pages as free.
400 // However this is not truly the case. What memory is free?
401 // 1) Mark page 0 as in use.
402 // This way we preserve the real-mode IDT and BIOS structures
403 // in case we ever need them. (Currently we don't, but...)
404 // 2) Mark the rest of base memory as free.
405 // 3) Then comes the IO hole [IOPHYSMEM, EXTPHYSMEM).
406 // Mark it as in use so that it can never be allocated.
407 // 4) Then extended memory [EXTPHYSMEM, ...).
408 // Some of it is in use, some is free. Where is the kernel?
409 // Which pages are used for page tables and other data structures?
411 // Change the code to reflect this.
413 LIST_INIT(&page_free_list);
414 for (i = 0; i < npage; i++) {
416 LIST_INSERT_HEAD(&page_free_list, &pages[i], pp_link);
421 // Initialize a Page structure.
422 // The result has null links and 0 refcount.
423 // Note that the corresponding physical page is NOT initialized!
426 page_initpp(struct Page *pp)
428 memset(pp, 0, sizeof(*pp));
432 // Allocates a physical page.
433 // Does NOT set the contents of the physical page to zero -
434 // the caller must do that if necessary.
436 // *pp_store -- is set to point to the Page struct of the newly allocated
441 // -E_NO_MEM -- otherwise
443 // Hint: use LIST_FIRST, LIST_REMOVE, and page_initpp
444 // Hint: pp_ref should not be incremented
446 page_alloc(struct Page **pp_store)
448 // Fill this function in
453 // Return a page to the free list.
454 // (This function should only be called when pp->pp_ref reaches 0.)
457 page_free(struct Page *pp)
459 // Fill this function in
463 // Decrement the reference count on a page,
464 // freeing it if there are no more refs.
467 page_decref(struct Page* pp)
469 if (--pp->pp_ref == 0)
473 // Given 'pgdir', a pointer to a page directory, pgdir_walk returns
474 // a pointer to the page table entry (PTE) for linear address 'va'.
475 // This requires walking the two-level page table structure.
477 // If the relevant page table doesn't exist in the page directory, then:
478 // - If create == 0, pgdir_walk returns NULL.
479 // - Otherwise, pgdir_walk tries to allocate a new page table
480 // with page_alloc. If this fails, pgdir_walk returns NULL.
481 // - Otherwise, pgdir_walk returns a pointer into the new page table.
483 // This is boot_pgdir_walk, but using page_alloc() instead of boot_alloc().
484 // Unlike boot_pgdir_walk, pgdir_walk can fail.
486 // Hint: you can turn a Page * into the physical address of the
487 // page it refers to with page2pa() from kern/pmap.h.
489 pgdir_walk(pde_t *pgdir, const void *va, int create)
491 // Fill this function in
496 // Map the physical page 'pp' at virtual address 'va'.
497 // The permissions (the low 12 bits) of the page table
498 // entry should be set to 'perm|PTE_P'.
501 // - If there is already a page mapped at 'va', it is page_remove()d.
502 // - If necessary, on demand, allocates a page table and inserts it into
504 // - pp->pp_ref should be incremented if the insertion succeeds.
505 // - The TLB must be invalidated if a page was formerly present at 'va'.
509 // -E_NO_MEM, if page table couldn't be allocated
511 // Hint: The TA solution is implemented using pgdir_walk, page_remove,
515 page_insert(pde_t *pgdir, struct Page *pp, void *va, int perm)
517 // Fill this function in
522 // Return the page mapped at virtual address 'va'.
523 // If pte_store is not zero, then we store in it the address
524 // of the pte for this page. This is used by page_remove
525 // but should not be used by other callers.
527 // Return 0 if there is no page mapped at va.
529 // Hint: the TA solution uses pgdir_walk and pa2page.
532 page_lookup(pde_t *pgdir, void *va, pte_t **pte_store)
534 // Fill this function in
539 // Unmaps the physical page at virtual address 'va'.
540 // If there is no physical page at that address, silently does nothing.
543 // - The ref count on the physical page should decrement.
544 // - The physical page should be freed if the refcount reaches 0.
545 // - The pg table entry corresponding to 'va' should be set to 0.
546 // (if such a PTE exists)
547 // - The TLB must be invalidated if you remove an entry from
548 // the pg dir/pg table.
550 // Hint: The TA solution is implemented using page_lookup,
551 // tlb_invalidate, and page_decref.
554 page_remove(pde_t *pgdir, void *va)
556 // Fill this function in
560 // Invalidate a TLB entry, but only if the page tables being
561 // edited are the ones currently in use by the processor.
564 tlb_invalidate(pde_t *pgdir, void *va)
566 // Flush the entry only if we're modifying the current address space.
567 // For now, there is only one address space, so always invalidate.
574 struct Page *pp, *pp0, *pp1, *pp2;
578 // should be able to allocate three pages
580 assert(page_alloc(&pp0) == 0);
581 assert(page_alloc(&pp1) == 0);
582 assert(page_alloc(&pp2) == 0);
585 assert(pp1 && pp1 != pp0);
586 assert(pp2 && pp2 != pp1 && pp2 != pp0);
588 // temporarily steal the rest of the free pages
590 LIST_INIT(&page_free_list);
592 // should be no free memory
593 assert(page_alloc(&pp) == -E_NO_MEM);
595 // there is no page allocated at address 0
596 assert(page_lookup(boot_pgdir, (void *) 0x0, &ptep) == NULL);
598 // there is no free memory, so we can't allocate a page table
599 assert(page_insert(boot_pgdir, pp1, 0x0, 0) < 0);
601 // free pp0 and try again: pp0 should be used for page table
603 assert(page_insert(boot_pgdir, pp1, 0x0, 0) == 0);
604 assert(PTE_ADDR(boot_pgdir[0]) == page2pa(pp0));
605 assert(check_va2pa(boot_pgdir, 0x0) == page2pa(pp1));
606 assert(pp1->pp_ref == 1);
607 assert(pp0->pp_ref == 1);
609 // should be able to map pp2 at PGSIZE because pp0 is already allocated for page table
610 assert(page_insert(boot_pgdir, pp2, (void*) PGSIZE, 0) == 0);
611 assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp2));
612 assert(pp2->pp_ref == 1);
614 // should be no free memory
615 assert(page_alloc(&pp) == -E_NO_MEM);
617 // should be able to map pp2 at PGSIZE because it's already there
618 assert(page_insert(boot_pgdir, pp2, (void*) PGSIZE, 0) == 0);
619 assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp2));
620 assert(pp2->pp_ref == 1);
622 // pp2 should NOT be on the free list
623 // could happen in ref counts are handled sloppily in page_insert
624 assert(page_alloc(&pp) == -E_NO_MEM);
626 // should not be able to map at PTSIZE because need free page for page table
627 assert(page_insert(boot_pgdir, pp0, (void*) PTSIZE, 0) < 0);
629 // insert pp1 at PGSIZE (replacing pp2)
630 assert(page_insert(boot_pgdir, pp1, (void*) PGSIZE, 0) == 0);
632 // should have pp1 at both 0 and PGSIZE, pp2 nowhere, ...
633 assert(check_va2pa(boot_pgdir, 0) == page2pa(pp1));
634 assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp1));
635 // ... and ref counts should reflect this
636 assert(pp1->pp_ref == 2);
637 assert(pp2->pp_ref == 0);
639 // pp2 should be returned by page_alloc
640 assert(page_alloc(&pp) == 0 && pp == pp2);
642 // unmapping pp1 at 0 should keep pp1 at PGSIZE
643 page_remove(boot_pgdir, 0x0);
644 assert(check_va2pa(boot_pgdir, 0x0) == ~0);
645 assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp1));
646 assert(pp1->pp_ref == 1);
647 assert(pp2->pp_ref == 0);
649 // unmapping pp1 at PGSIZE should free it
650 page_remove(boot_pgdir, (void*) PGSIZE);
651 assert(check_va2pa(boot_pgdir, 0x0) == ~0);
652 assert(check_va2pa(boot_pgdir, PGSIZE) == ~0);
653 assert(pp1->pp_ref == 0);
654 assert(pp2->pp_ref == 0);
656 // so it should be returned by page_alloc
657 assert(page_alloc(&pp) == 0 && pp == pp1);
659 // should be no free memory
660 assert(page_alloc(&pp) == -E_NO_MEM);
662 // forcibly take pp0 back
663 assert(PTE_ADDR(boot_pgdir[0]) == page2pa(pp0));
665 assert(pp0->pp_ref == 1);
668 // give free list back
671 // free the pages we took
676 cprintf("page_check() succeeded!\n");
681 // testing code for boot_pgdir_walk
683 temp = boot_pgdir_walk(pgdir, VPT + (VPT >> 10), 1);
684 cprintf("pgdir = %p\n", pgdir);
685 cprintf("test recursive walking pte_t* = %p\n", temp);
686 cprintf("test recursive walking entry = %p\n", PTE_ADDR(temp));
687 temp = boot_pgdir_walk(pgdir, 0xc0400000, 1);
688 cprintf("LA = 0xc0400000 = %p\n", temp);
689 temp = boot_pgdir_walk(pgdir, 0xc0400070, 1);
690 cprintf("LA = 0xc0400070 = %p\n", temp);
691 temp = boot_pgdir_walk(pgdir, 0xc0800000, 0);
692 cprintf("LA = 0xc0800000, no create = %p\n", temp);
693 temp = boot_pgdir_walk(pgdir, 0xc0600070, 1);
694 cprintf("LA = 0xc0600070 = %p\n", temp);
695 temp = boot_pgdir_walk(pgdir, 0xc0600090, 0);
696 cprintf("LA = 0xc0600090, nc = %p\n", temp);
697 temp = boot_pgdir_walk(pgdir, 0xc0608070, 0);
698 cprintf("LA = 0xc0608070, nc = %p\n", temp);
699 temp = boot_pgdir_walk(pgdir, 0xc0800070, 1);
700 cprintf("LA = 0xc0800070 = %p\n", temp);
701 temp = boot_pgdir_walk(pgdir, 0xc0b00070, 0);
702 cprintf("LA = 0xc0b00070, nc = %p\n", temp);
703 temp = boot_pgdir_walk(pgdir, 0xc0c00000, 0);
704 cprintf("LA = 0xc0c00000, nc = %p\n", temp);