Removed the old pfault, fork() uses VMRs
authorBarret Rhoden <brho@cs.berkeley.edu>
Wed, 7 Jul 2010 22:26:05 +0000 (15:26 -0700)
committerKevin Klues <klueska@cs.berkeley.edu>
Thu, 3 Nov 2011 00:35:48 +0000 (17:35 -0700)
This gets rid of the last bits of the old per-PTE memory region
management.  Lots of gotchas still, mostly related to file backed memory
and swapped out pages.

kern/arch/i686/mmu.h
kern/arch/sparc/mmu.h
kern/include/mm.h
kern/include/ros/memlayout.h
kern/src/env.c
kern/src/mm.c
kern/src/pmap.c
kern/src/syscall.c
tests/fork.c [new file with mode: 0644]

index e4a84e4..3e224c1 100644 (file)
@@ -286,13 +286,6 @@ extern pseudodesc_t gdt_pd;
 #define PAGE_UNMAPPED(pte) ((pte) == 0)
 #define PAGE_PAGED_OUT(pte) (!PAGE_PRESENT(pte) && !PAGE_UNMAPPED(pte))
 
-// get the pfault_info pointer stored in this PTE.
-// useless unless PAGE_PAGED_OUT(pte).
-#define PTE2PFAULT_INFO(pte) ((struct pfault_info*)pte)
-// convert a pfault_info pointer to a PTE.
-// assumes the pointer is 4-byte aligned.
-#define PFAULT_INFO2PTE(ptr) ((pte_t)ptr)
-
 #endif /* !__ASSEMBLER__ */
 
 #endif /* !ROS_ARCH_MMU_H */
index 4a67804..69a31f2 100644 (file)
 #define PAGE_UNMAPPED(pte) ((pte) == 0)
 #define PAGE_PAGED_OUT(pte) (!PAGE_PRESENT(pte) && !PAGE_UNMAPPED(pte))
 
-// get the pfault_info pointer stored in this PTE.
-// useless unless PAGE_PAGED_OUT(pte).
-#define PTE2PFAULT_INFO(pte) ((struct pfault_info*)pte)
-// convert a pfault_info pointer to a PTE.
-// assumes the pointer is 4-byte aligned.
-#define PFAULT_INFO2PTE(ptr) ((pte_t)ptr)
-
 #endif /* !ROS_INC_MMU_H */
index 212af92..4aec314 100644 (file)
@@ -89,23 +89,9 @@ void destroy_vmr(struct vm_region *vmr);
 struct vm_region *find_vmr(struct proc *p, uintptr_t va);
 struct vm_region *find_first_vmr(struct proc *p, uintptr_t va);
 void isolate_vmrs(struct proc *p, uintptr_t va, size_t len);
+void duplicate_vmrs(struct proc *p, struct proc *new_p);
 void print_vmrs(struct proc *p);
 
-// at least for now, we aren't using vm regions. we're storing pointers
-// to pfault_info_t inside the PTEs in an arch-specific way.
-typedef struct pfault_info {
-       struct file* file; // or NULL for zero-fill
-       size_t pgoff; // offset into file
-       size_t read_len; // amount of file to read into this page (zero-fill rest)
-       int prot;
-} pfault_info_t;
-
-void mmap_init(void);
-
-pfault_info_t* pfault_info_alloc(struct file* file);
-void pfault_info_free(pfault_info_t* pfi);
-
-
 /* mmap() related functions.  These manipulate VMRs and change the hardware page
  * tables. */
 void *mmap(struct proc *p, uintptr_t addr, size_t len, int prot, int flags,
index 7bce867..5b6854d 100644 (file)
@@ -43,7 +43,7 @@
  *                     |                              | RW/RW            PTSIZE
  *                     |     Per-Process R/W Data     |                   |
  *    UDATA     ---->  +------------------------------+ 0xbec00000      --+
- *                     |    Global Shared R/W Data    | RW/RW  PGSIZE
+ *    UMAPTOP,         |    Global Shared R/W Data    | RW/RW  PGSIZE
  * UXSTACKTOP,UGDATA ->+------------------------------+ 0xbebff000
  *                     |     User Exception Stack     | RW/RW  PGSIZE
  *                     +------------------------------+ 0xbebfe000
index 45ea809..a13ecef 100644 (file)
@@ -278,10 +278,9 @@ void env_user_mem_free(env_t* e, void* start, size_t len)
                        page_t* page = ppn2page(PTE2PPN(*pte));
                        *pte = 0;
                        page_decref(page);
-               }
-               else // PAGE_PAGED_OUT(*pte)
-               {
-                       pfault_info_free(PTE2PFAULT_INFO(*pte));
+               } else {
+                       assert(PAGE_PAGED_OUT(*pte));
+                       /* TODO: (SWAP) deal with this */
                        *pte = 0;
                }
                return 0;
index 7622ecc..758103e 100644 (file)
 #include <vfs.h>
 
 struct kmem_cache *vmr_kcache;
-struct kmem_cache *pfault_info_cache;
 
 void vmr_init(void)
 {
        vmr_kcache = kmem_cache_create("vm_regions", sizeof(struct vm_region),
                                       __alignof__(struct dentry), 0, 0, 0);
-       pfault_info_cache = kmem_cache_create("pfault_info",
-                                             sizeof(pfault_info_t), 8, 0, 0, 0);
 }
 
 /* For now, the caller will set the prot, flags, file, and offset.  In the
@@ -56,6 +53,8 @@ struct vm_region *create_vmr(struct proc *p, uintptr_t va, size_t len)
        vm_i = TAILQ_FIRST(&p->vm_regions);
        if (!vm_i || (va + len < vm_i->vm_base)) {
                vmr = kmem_cache_alloc(vmr_kcache, 0);
+               if (!vmr)
+                       panic("EOM!");
                vmr->vm_base = va;
                TAILQ_INSERT_HEAD(&p->vm_regions, vmr, vm_link);
        } else {
@@ -213,6 +212,32 @@ void isolate_vmrs(struct proc *p, uintptr_t va, size_t len)
                split_vmr(vmr, va + len);
 }
 
+/* This will make new_p have the same VMRs as p, though it does nothing to
+ * ensure the physical pages or whatever are shared/mapped/copied/whatever.
+ * This is used by fork().
+ *
+ * Note that if you are working on a VMR that is a file, you'll want to be
+ * careful about how it is mapped (SHARED, PRIVATE, etc). */
+void duplicate_vmrs(struct proc *p, struct proc *new_p)
+{
+       struct vm_region *vmr, *vm_i;
+       TAILQ_FOREACH(vm_i, &p->vm_regions, vm_link) {
+               vmr = kmem_cache_alloc(vmr_kcache, 0);
+               if (!vmr)
+                       panic("EOM!");
+               vmr->vm_proc = new_p;
+               vmr->vm_base = vm_i->vm_base;
+               vmr->vm_end = vm_i->vm_end;
+               vmr->vm_perm = vm_i->vm_perm;   
+               vmr->vm_flags = vm_i->vm_flags; 
+               vmr->vm_file = vm_i->vm_file;
+               if (vmr->vm_file)
+                       atomic_inc(&vmr->vm_file->f_refcnt);
+               vmr->vm_foff = vm_i->vm_foff;
+               TAILQ_INSERT_TAIL(&new_p->vm_regions, vmr, vm_link);
+       }
+}
+
 void print_vmrs(struct proc *p)
 {
        int count = 0;
@@ -415,6 +440,7 @@ int __do_munmap(struct proc *p, uintptr_t addr, size_t len)
                                 * PAGED_OUT is also being used to mean "hasn't been mapped
                                 * yet".  Note we now allow PAGE_UNMAPPED, unlike older
                                 * versions of mmap(). */
+                               *pte = 0;
                        }
                }
                next_vmr = TAILQ_NEXT(vmr, vm_link);
@@ -506,17 +532,3 @@ int __handle_page_fault(struct proc* p, uintptr_t va, int prot)
        *ppte = PTE(page2ppn(a_page), PTE_P | pte_perm);
        return 0;
 }
-
-pfault_info_t* pfault_info_alloc(struct file* file)
-{
-       if(file)
-               file_incref(file);
-       return kmem_cache_alloc(pfault_info_cache,0);
-}
-
-void pfault_info_free(pfault_info_t* pfi)
-{
-       if(pfi->file)
-               file_decref(pfi->file);
-       kmem_cache_free(pfault_info_cache,pfi);
-}
index 0017ea7..60cceff 100644 (file)
@@ -213,25 +213,30 @@ page_t *page_lookup(pde_t *pgdir, void *va, pte_t **pte_store)
  * @param pgdir the page directory from with the page sholuld be removed
  * @param va    the virtual address at which the page we are trying to 
  *              remove is mapped
+ * TODO: consider deprecating this, esp given issues with TLB management.  might
+ * want to have the caller need to manage the TLB.  Also note it is used in
+ * env_user_mem_free, minus the walk.
  */
 void page_remove(pde_t *pgdir, void *va)
 {
-       pte_tpte;
+       pte_t *pte;
        page_t *page;
 
        pte = pgdir_walk(pgdir,va,0);
-       if(!pte || PAGE_UNMAPPED(*pte))
+       if (!pte || PAGE_UNMAPPED(*pte))
                return;
 
-       if(PAGE_PAGED_OUT(*pte))
-               pfault_info_free(PTE2PFAULT_INFO(*pte));
-       else // PAGE_PRESENT
-       {
+       if (PAGE_PRESENT(*pte)) {
+               /* TODO: (TLB) race here, where the page can be given out before
+                * the shootdown happened.  Need to put it on a temp list. */
+               page = ppn2page(PTE2PPN(*pte));
+               *pte = 0;
+               page_decref(page);
                tlb_invalidate(pgdir, va);
-               page_decref(ppn2page(PTE2PPN(*pte)));
+       } else if (PAGE_PAGED_OUT(*pte)) {
+               /* TODO: (SWAP) need to free this from the swap */
+               *pte = 0;
        }
-
-       *pte = 0;
 }
 
 /**
index 0a6a4cb..457ec0b 100644 (file)
@@ -336,6 +336,8 @@ static ssize_t sys_fork(env_t* e)
                if(GET_BITMASK_BIT(e->cache_colors_map,i))
                        cache_color_alloc(llc_cache, env->cache_colors_map);
 
+       duplicate_vmrs(e, env);
+
        int copy_page(env_t* e, pte_t* pte, void* va, void* arg)
        {
                env_t* env = (env_t*)arg;
@@ -352,22 +354,16 @@ static ssize_t sys_fork(env_t* e)
                        }
 
                        pagecopy(page2kva(pp),ppn2kva(PTE2PPN(*pte)));
-               }
-               else // PAGE_PAGED_OUT(*pte)
-               {
+               } else {
+                       assert(PAGE_PAGED_OUT(*pte));
+                       /* TODO: (SWAP) will need to either make a copy or CoW/refcnt the
+                        * backend store.  For now, this PTE will be the same as the
+                        * original PTE */
                        pte_t* newpte = pgdir_walk(env->env_pgdir,va,1);
                        if(!newpte)
                                return -1;
-
-                       struct file* file = PTE2PFAULT_INFO(*pte)->file;
-                       pfault_info_t* newpfi = pfault_info_alloc(file);
-                       if(!newpfi)
-                               return -1;
-
-                       *newpfi = *PTE2PFAULT_INFO(*pte);
-                       *newpte = PFAULT_INFO2PTE(newpfi);
+                       *newpte = *pte;
                }
-
                return 0;
        }
 
@@ -378,9 +374,9 @@ static ssize_t sys_fork(env_t* e)
        env->procinfo->pid = env->pid;
        env->procinfo->ppid = env->ppid;
 
-       // copy all memory below procdata
-       if(env_user_mem_walk(e,0,UDATA,&copy_page,env))
-       {
+       /* for now, just copy the contents of every present page in the entire
+        * address space. */
+       if (env_user_mem_walk(e, 0, UMAPTOP, &copy_page, env)) {
                proc_decref(env,2);
                set_errno(current_tf,ENOMEM);
                return -1;
diff --git a/tests/fork.c b/tests/fork.c
new file mode 100644 (file)
index 0000000..b1bd6d8
--- /dev/null
@@ -0,0 +1,14 @@
+#include <stdlib.h>
+#include <rstdio.h>
+#include <unistd.h>
+
+int main(int argc, char** argv)
+{
+       pid_t pid = 0;
+       pid = fork();
+       if (pid)
+               printf("Hello world from parent!!\n");
+       else 
+               printf("Hello world from child!!\n");
+       return 0;
+}