Sped up fork/exec significantly
[akaros.git] / kern / src / syscall.c
index cb86c8f..47c746f 100644 (file)
@@ -23,6 +23,8 @@
 #include <kmalloc.h>
 #include <stdio.h>
 #include <resource.h>
+#include <colored_caches.h>
+#include <arch/bitmask.h>
 #include <kfs.h> // eventually replace this with vfs.h
 
 #ifdef __sparc_v8__
@@ -80,21 +82,153 @@ static ssize_t sys_serial_read(env_t* e, char *DANGEROUS _buf, size_t len)
 /* START OF REMOTE SYSTEMCALL SUPPORT SYSCALLS. THESE WILL GO AWAY AS THINGS MATURE */
 //
 
-static ssize_t sys_run_binary(env_t* e, void *DANGEROUS binary_buf,
-                              void*DANGEROUS arg, size_t len) {
-       uint8_t *CT(len) checked_binary_buf;
-       checked_binary_buf = user_mem_assert(e, binary_buf, len, PTE_USER_RO);
+static ssize_t sys_fork(env_t* e)
+{
+       // TODO: right now we only support fork for single-core processes
+       if(e->state != PROC_RUNNING_S)
+               return -1;
+
+       env_t* env = proc_create(NULL,0);
+       assert(env != NULL);
+
+       env->heap_bottom = e->heap_bottom;
+       env->heap_top = e->heap_top;
+       env->ppid = e->pid;
+       env->env_tf = *current_tf;
+
+       env->cache_colors_map = cache_colors_map_alloc();
+       for(int i=0; i < llc_cache->num_colors; i++)
+               if(GET_BITMASK_BIT(e->cache_colors_map,i))
+                       cache_color_alloc(llc_cache, env->cache_colors_map);
+
+       // copy page table and page contents.
+       // TODO: does not work with mmap.  only text, heap, stack are copied.
+       for(char* va = 0; va < (char*)UTOP; va += PGSIZE)
+       {
+               // copy [0,heaptop] and [stackbot,utop]
+               if(va == env->heap_top)
+                       va = (char*)USTACKBOT;
+
+               int perms = get_va_perms(e->env_pgdir,va);
+               if(perms)
+               {
+                       page_t* pp;
+                       assert(upage_alloc(env,&pp,0) == 0);
+                       assert(page_insert(env->env_pgdir,pp,va,perms) == 0);
+
+                       pte_t* pte = pgdir_walk(e->env_pgdir,va,0);
+                       assert(pte);
+                       pagecopy(page2kva(pp),ppn2kva(PTE2PPN(*pte)));
+               }
+       }
+
+       __proc_set_state(env, PROC_RUNNABLE_S);
+       schedule_proc(env);
+
+       // don't decref the new process.
+       // that will happen when the parent waits for it.
+
+       printd("[PID %d] fork PID %d\n",e->pid,env->pid);
+
+       return env->pid;
+}
+
+static ssize_t sys_trywait(env_t* e, pid_t pid, int* status)
+{
+       struct proc* p = pid2proc(pid);
+
+       // TODO: this syscall is racy, so we only support for single-core procs
+       if(e->state != PROC_RUNNING_S)
+               return -1;
+
+       // TODO: need to use errno properly.  sadly, ROS error codes conflict..
+
+       if(p)
+       {
+               ssize_t ret;
+
+               if(current->pid == p->ppid)
+               {
+                       if(p->state == PROC_DYING)
+                       {
+                               memcpy_to_user(e,status,&p->exitcode,sizeof(int));
+                               printd("[PID %d] waited for PID %d (code %d)\n",
+                                      e->pid,p->pid,p->exitcode);
+                               ret = 0;
+                       }
+                       else // not dead yet
+                       {
+                               set_errno(current_tf,0);
+                               ret = -1;
+                       }
+               }
+               else // not a child of the calling process
+               {
+                       set_errno(current_tf,1);
+                       ret = -1;
+               }
+
+               // if the wait succeeded, decref twice
+               proc_decref(p,1 + (ret == 0));
+               return ret;
+       }
+
+       set_errno(current_tf,1);
+       return -1;
+}
+
+static ssize_t sys_exec(env_t* e, void *DANGEROUS binary_buf, size_t len,
+                        void*DANGEROUS arg, void*DANGEROUS env)
+{
+       // TODO: right now we only support exec for single-core processes
+       if(e->state != PROC_RUNNING_S)
+               return -1;
+
+       if(memcpy_from_user(e,e->env_procinfo->argv_buf,arg,PROCINFO_MAX_ARGV_SIZE))
+               return -1;
+       if(memcpy_from_user(e,e->env_procinfo->env_buf,env,PROCINFO_MAX_ENV_SIZE))
+               return -1;
+
+       void* binary = kmalloc(len,0);
+       if(binary == NULL)
+               return -1;
+       if(memcpy_from_user(e,binary,binary_buf,len))
+       {
+               kfree(binary);
+               return -1;
+       }
+
+       // TODO: this breaks with mmap
+       env_segment_free(e,0,(intptr_t)e->heap_top);
+       env_segment_free(e,(void*)USTACKBOT,USTACKTOP-USTACKBOT);
+
+       env_load_icode(e,NULL,binary,len);
+       proc_init_trapframe(current_tf,0);
+
+       kfree(binary);
+       return 0;
+}
+
+static ssize_t sys_run_binary(env_t* e, void *DANGEROUS binary_buf, size_t len,
+                              void*DANGEROUS arg, size_t num_colors)
+{
+       env_t* env = proc_create(NULL,0);
+       assert(env != NULL);
 
-       uint8_t* new_binary = kmalloc(len, 0);
-       if(new_binary == NULL)
-               return -ENOMEM;
-       memcpy(new_binary, checked_binary_buf, len);
+       static_assert(PROCINFO_NUM_PAGES == 1);
+       assert(memcpy_from_user(e,env->env_procinfo->argv_buf,arg,PROCINFO_MAX_ARGV_SIZE) == ESUCCESS);
+       *(intptr_t*)env->env_procinfo->env_buf = 0;
 
-       env_t* env = proc_create(new_binary, len);
-       kfree(new_binary);
+       env_load_icode(env,e,binary_buf,len);
        __proc_set_state(env, PROC_RUNNABLE_S);
        schedule_proc(env);
+       if(num_colors > 0) {
+               env->cache_colors_map = cache_colors_map_alloc();
+               for(int i=0; i<num_colors; i++)
+                       cache_color_alloc(llc_cache, env->cache_colors_map);
+       }
        proc_decref(env, 1);
+       proc_yield(e);
        return 0;
 }
 
@@ -186,19 +320,19 @@ static ssize_t sys_shared_page_alloc(env_t* p1,
 
        void * COUNT(1) * COUNT(1) addr = user_mem_assert(p1, _addr, sizeof(void *),
                                                       PTE_USER_RW);
-       env_t* p2 = pid2proc(p2_id);
+       struct proc *p2 = pid2proc(p2_id);
        if (!p2)
                return -EBADPROC;
 
        page_t* page;
-       error_t e = page_alloc(&page);
+       error_t e = upage_alloc(p1, &page,1);
        if (e < 0) {
                proc_decref(p2, 1);
                return e;
        }
 
        void* p2_addr = page_insert_in_range(p2->env_pgdir, page,
-                                            (void*SNT)UTEXT, (void*SNT)UTOP, p2_flags);
+                       (void*SNT)UTEXT, (void*SNT)UTOP, p2_flags);
        if (p2_addr == NULL) {
                page_free(page);
                proc_decref(p2, 1);
@@ -206,7 +340,7 @@ static ssize_t sys_shared_page_alloc(env_t* p1,
        }
 
        void* p1_addr = page_insert_in_range(p1->env_pgdir, page,
-                                           (void*SNT)UTEXT, (void*SNT)UTOP, p1_flags);
+                       (void*SNT)UTEXT, (void*SNT)UTOP, p1_flags);
        if(p1_addr == NULL) {
                page_remove(p2->env_pgdir, p2_addr);
                page_free(page);
@@ -274,7 +408,7 @@ static void sys_cache_buster(struct proc *p, uint32_t num_writes,
        if (num_pages) {
                spin_lock(&buster_lock);
                for (int i = 0; i < MIN(num_pages, MAX_PAGES); i++) {
-                       page_alloc(&a_page[i]);
+                       upage_alloc(p, &a_page[i],1);
                        page_insert(p->env_pgdir, a_page[i], (void*)INSERT_ADDR + PGSIZE*i,
                                    PTE_USER_RW);
                }
@@ -345,11 +479,25 @@ static uint32_t sys_getcpuid(void)
        return core_id();
 }
 
+// TODO: Temporary hack until thread-local storage is implemented on i386
+static size_t sys_getvcoreid(env_t* e)
+{
+       if(e->state == PROC_RUNNING_S)
+               return 0;
+
+       size_t i;
+       for(i = 0; i < e->num_vcores; i++)
+               if(core_id() == e->vcoremap[i])
+                       return i;
+
+       panic("virtual core id not found in sys_getvcoreid()!");
+}
+
 /* Destroy proc pid.  If this is called by the dying process, it will never
  * return.  o/w it will return 0 on success, or an error.  Errors include:
  * - EBADPROC: if there is no such process with pid
  * - EPERM: if caller does not control pid */
-static error_t sys_proc_destroy(struct proc *p, pid_t pid)
+static error_t sys_proc_destroy(struct proc *p, pid_t pid, int exitcode)
 {
        error_t r;
        struct proc *p_to_die = pid2proc(pid);
@@ -362,8 +510,9 @@ static error_t sys_proc_destroy(struct proc *p, pid_t pid)
        }
        if (p_to_die == p) {
                // syscall code and pid2proc both have edible references, only need 1.
+               p->exitcode = exitcode;
                proc_decref(p, 1);
-               printk("[PID %d] proc exiting gracefully\n", p->pid);
+               printd("[PID %d] proc exiting gracefully (code %d)\n", p->pid,exitcode);
        } else {
                panic("Destroying other processes is not supported yet.");
                //printk("[%d] destroying proc %d\n", p->pid, p_to_die->pid);
@@ -433,6 +582,26 @@ static error_t sys_proc_run(struct proc *p, unsigned pid)
        return retval;
 }
 
+static error_t sys_brk(struct proc *p, void* addr) {
+       size_t range;
+
+       if((addr < p->heap_bottom) || (addr >= (void*)USTACKBOT))
+               return -EINVAL;
+       if(addr == p->heap_top)
+               return ESUCCESS;
+
+       if (addr > p->heap_top) {
+               range = addr - p->heap_top;
+               env_segment_alloc(p, p->heap_top, range);
+       }
+       else if (addr < p->heap_top) {
+               range = p->heap_top - addr;
+               env_segment_free(p, addr, range);
+       }
+       p->heap_top = addr;
+       return ESUCCESS;
+}
+
 /* Executes the given syscall.
  *
  * Note tf is passed in, which points to the tf of the context on the kernel
@@ -482,10 +651,12 @@ intreg_t syscall(struct proc *p, uintreg_t syscallno, uintreg_t a1,
                        return sys_cgetc(p); // this will need to block
                case SYS_getcpuid:
                        return sys_getcpuid();
+               case SYS_getvcoreid:
+                       return sys_getvcoreid(p);
                case SYS_getpid:
                        return sys_getpid(p);
                case SYS_proc_destroy:
-                       return sys_proc_destroy(p, (pid_t)a1);
+                       return sys_proc_destroy(p, (pid_t)a1, (int)a2);
                case SYS_yield:
                        proc_yield(p);
                        return ESUCCESS;
@@ -503,8 +674,7 @@ intreg_t syscall(struct proc *p, uintreg_t syscallno, uintreg_t a1,
                        _a6 = args[2];
                        return (intreg_t) mmap(p, a1, a2, a3, _a4, _a5, _a6);
                case SYS_brk:
-                       printk("brk not implemented yet\n");
-                       return -EINVAL;
+                       return sys_brk(p, (void*)a1);
                case SYS_resource_req:
                        return resource_req(p, a1, a2, a3, a4);
 
@@ -515,8 +685,7 @@ intreg_t syscall(struct proc *p, uintreg_t syscallno, uintreg_t a1,
                        return sys_serial_read(p, (char *DANGEROUS)a1, (size_t)a2);
        #endif
                case SYS_run_binary:
-                       return sys_run_binary(p, (char *DANGEROUS)a1,
-                                             (char* DANGEROUS)a2, (size_t)a3);
+                       return sys_run_binary(p, (char *DANGEROUS)a1, (size_t)a2, (void* DANGEROUS)a3, (size_t)a4);
        #ifdef __NETWORK__
                case SYS_eth_write:
                        return sys_eth_write(p, (char *DANGEROUS)a1, (size_t)a2);
@@ -525,9 +694,22 @@ intreg_t syscall(struct proc *p, uintreg_t syscallno, uintreg_t a1,
        #endif
        #ifdef __sparc_v8__
                case SYS_frontend:
-                       return frontend_syscall_from_user(p,a1,a2,a3,a4);
+                       return frontend_syscall_from_user(p,a1,a2,a3,a4,a5);
        #endif
 
+               case SYS_reboot:
+                       reboot();
+                       return 0;
+
+               case SYS_fork:
+                       return sys_fork(p);
+
+               case SYS_trywait:
+                       return sys_trywait(p,(pid_t)a1,(int*)a2);
+
+               case SYS_exec:
+                       return sys_exec(p, (char *DANGEROUS)a1, (size_t)a2, (void* DANGEROUS)a3, (void* DANGEROUS)a4);
+
                default:
                        // or just return -EINVAL
                        panic("Invalid syscall number %d for proc %x!", syscallno, *p);