SYS_populate_va (XCC)
authorBarret Rhoden <brho@cs.berkeley.edu>
Tue, 18 Feb 2014 00:33:19 +0000 (16:33 -0800)
committerBarret Rhoden <brho@cs.berkeley.edu>
Tue, 18 Feb 2014 03:49:30 +0000 (19:49 -0800)
Syscall for populating a range of virtual addresses.  This will usually
be used by PF handlers on file-backed VMRs, but it can be done for any
sort of user-controlled read-ahead that is desired.

If people want more control, like an FD interface that doesn't bother
with PTEs, let me know.

You need to reinstall your kernel headers.

kern/include/mm.h
kern/include/ros/bits/syscall.h
kern/src/mm.c
kern/src/syscall.c

index 56145a7..f18b260 100644 (file)
@@ -63,6 +63,7 @@ void *do_mmap(struct proc *p, uintptr_t addr, size_t len, int prot, int flags,
 int mprotect(struct proc *p, uintptr_t addr, size_t len, int prot);
 int munmap(struct proc *p, uintptr_t addr, size_t len);
 int handle_page_fault(struct proc *p, uintptr_t va, int prot);
+unsigned long populate_va(struct proc *p, uintptr_t va, unsigned long nr_pgs);
 
 /* These assume the mm_lock is held already */
 int __do_mprotect(struct proc *p, uintptr_t addr, size_t len, int prot);
index 8778da6..7f1569b 100644 (file)
@@ -42,6 +42,7 @@
 #define SYS_change_to_m                                29
 #define SYS_poke_ksched                                30
 #define SYS_abort_sysc                         31
+#define SYS_populate_va                                32
 
 /* FS Syscalls */
 #define SYS_read                               100
index a38155d..f9f21d8 100644 (file)
@@ -1012,6 +1012,56 @@ out:
        return ret;
 }
 
+/* Attempts to populate the pages, as if there was a page faults.  Bails on
+ * errors, and returns the number of pages populated.  */
+unsigned long populate_va(struct proc *p, uintptr_t va, unsigned long nr_pgs)
+{
+       struct vm_region *vmr, vmr_copy;
+       unsigned long nr_pgs_this_vmr;
+       unsigned long nr_filled = 0;
+       struct page *page;
+       int pte_prot;
+
+       /* we can screw around with ways to limit the find_vmr calls (can do the
+        * next in line if we didn't unlock, etc., but i don't expect us to do this
+        * for more than a single VMR in most cases. */
+       spin_lock(&p->vmr_lock);
+       while (nr_pgs) {
+               vmr = find_vmr(p, va);
+               if (!vmr)
+                       break;
+               if (vmr->vm_prot == PROT_NONE)
+                       break;
+               pte_prot = (vmr->vm_prot & PROT_WRITE) ? PTE_USER_RW :
+                          (vmr->vm_prot & (PROT_READ|PROT_EXEC)) ? PTE_USER_RO : 0;
+               nr_pgs_this_vmr = MIN(nr_pgs, (vmr->vm_end - va) >> PGSHIFT);
+               if (!vmr->vm_file) {
+                       if (populate_anon_va(p, va, nr_pgs_this_vmr, pte_prot)) {
+                               /* on any error, we can just bail.  we might be underestimating
+                                * nr_filled. */
+                               break;
+                       }
+               } else {
+                       /* need to keep the file alive in case we unlock/block */
+                       kref_get(&vmr->vm_file->f_kref, 1);
+                       if (populate_pm_va(p, va, nr_pgs_this_vmr, pte_prot,
+                                          vmr->vm_file->f_mapping,
+                                          vmr->vm_foff - (va - vmr->vm_base),
+                                                          vmr->vm_flags, vmr->vm_prot & PROT_EXEC)) {
+                               /* we might have failed if the underlying file doesn't cover the
+                                * mmap window, depending on how we'll deal with truncation. */
+                               break;
+                       }
+                       kref_put(&vmr->vm_file->f_kref);
+               }
+               nr_filled += nr_pgs_this_vmr;
+               va += nr_pgs_this_vmr << PGSHIFT;
+               nr_pgs -= nr_pgs_this_vmr;
+       }
+       spin_unlock(&p->vmr_lock);
+       return nr_filled;
+}
+
 /* Kernel Dynamic Memory Mappings */
 uintptr_t dyn_vmap_llim = KERN_DYN_TOP;
 spinlock_t dyn_vmap_lock = SPINLOCK_INITIALIZER;
index 2e26ea2..4e19113 100644 (file)
@@ -1011,6 +1011,12 @@ static int sys_abort_sysc(struct proc *p, struct syscall *sysc)
        return abort_sysc(p, sysc);
 }
 
+static unsigned long sys_populate_va(struct proc *p, uintptr_t va,
+                                     unsigned long nr_pgs)
+{
+       return populate_va(p, ROUNDDOWN(va, PGSIZE), nr_pgs);
+}
+
 static intreg_t sys_read(struct proc *p, int fd, void *buf, int len)
 {
        ssize_t ret;
@@ -1763,6 +1769,7 @@ const static struct sys_table_entry syscall_table[] = {
        [SYS_change_to_m] = {(syscall_t)sys_change_to_m, "change_to_m"},
        [SYS_poke_ksched] = {(syscall_t)sys_poke_ksched, "poke_ksched"},
        [SYS_abort_sysc] = {(syscall_t)sys_abort_sysc, "abort_sysc"},
+       [SYS_populate_va] = {(syscall_t)sys_populate_va, "populate_va"},
 
        [SYS_read] = {(syscall_t)sys_read, "read"},
        [SYS_write] = {(syscall_t)sys_write, "write"},