Debug code to see remote kmsgs
[akaros.git] / kern / src / elf.c
index d162069..6cf23b1 100644 (file)
 # define elf_field(obj, field) ((obj##32)->field)
 #endif
 
-static int load_one_elf(struct proc *p, struct file *f, int pgoffset,
-                        elf_info_t *ei)
+/* We need the writable flag for ld.  Even though the elf header says it wants
+ * RX (and not W) for its main program header, it will page fault (eip 56f0,
+ * 46f0 after being relocated to 0x1000, va 0x20f4). */
+static int load_one_elf(struct proc *p, struct file *f, uintptr_t pgoffset,
+                        elf_info_t *ei, bool writable)
 {
        int ret = -1;
        ei->phdr = -1;
        ei->dynamic = 0;
        ei->highest_addr = 0;
        off_t f_off = 0;
-
-       /* assume program headers fit in a page.  if this isn't true, change the
-        * code below that maps in program headers */
-       char* elf = (char*)kmalloc(PGSIZE, 0);
+       void* phdrs = 0;
+       int mm_perms, mm_flags = MAP_FIXED;
        
        /* When reading on behalf of the kernel, we need to make sure no proc is
         * "current".  This is a bit ghetto (TODO: KFOP) */
        struct proc *cur_proc = current;
        current = 0;
-       if (!elf || f->f_op->read(f, elf, PGSIZE, &f_off) == -1)
+
+       /* Read in ELF header. */
+       elf64_t elfhdr_storage;
+       elf32_t* elfhdr32 = (elf32_t*)&elfhdr_storage;
+       elf64_t* elfhdr64 = &elfhdr_storage;
+       if (f->f_op->read(f, (char*)elfhdr64, sizeof(elf64_t), &f_off) == -1)
                goto fail;
-       current = cur_proc;
 
-       elf32_t* elfhdr32 = (elf32_t*)elf;
-       elf64_t* elfhdr64 = (elf64_t*)elf;
        bool elf32 = elfhdr32->e_ident[ELF_IDENT_CLASS] == ELFCLASS32;
-       bool elf64 = elfhdr32->e_ident[ELF_IDENT_CLASS] == ELFCLASS64;
-       if (!elf64 && !elf32)
+       bool elf64 = elfhdr64->e_ident[ELF_IDENT_CLASS] == ELFCLASS64;
+       if (elf64 == elf32)
                goto fail;
        #ifndef KERN64
        if(elf64)
                goto fail;
        #endif
-       
-       proghdr32_t* proghdrs32 = (proghdr32_t*)(elf + elfhdr32->e_phoff);
-       proghdr64_t* proghdrs64 = (proghdr64_t*)(elf + elfhdr64->e_phoff);
-       uintptr_t e_phoff = elf_field(elfhdr, e_phoff);
+
        size_t phsz = elf64 ? sizeof(proghdr64_t) : sizeof(proghdr32_t);
        uint16_t e_phnum = elf_field(elfhdr, e_phnum);
-       // we don't support prog hdrs extending past the first elf page
-       if (e_phoff + e_phnum * phsz > PGSIZE)
+       uint16_t e_phoff = elf_field(elfhdr, e_phoff);
+
+       /* Read in program headers. */
+       if (e_phnum > 10000 || e_phoff % (elf32 ? 4 : 8) != 0)
+         goto fail;
+       phdrs = kmalloc(e_phnum * phsz, 0);
+       f_off = e_phoff;
+       if (!phdrs || f->f_op->read(f, phdrs, e_phnum * phsz, &f_off) == -1)
                goto fail;
-
        for (int i = 0; i < e_phnum; i++) {
-               proghdr32_t* ph32 = proghdrs32+i;
-               proghdr64_t* ph64 = proghdrs64+i;
+               proghdr32_t* ph32 = (proghdr32_t*)phdrs + i;
+               proghdr64_t* ph64 = (proghdr64_t*)phdrs + i;
                uint16_t p_type = elf_field(ph, p_type);
                uintptr_t p_va = elf_field(ph, p_va);
                uintptr_t p_offset = elf_field(ph, p_offset);
                uintptr_t p_align = elf_field(ph, p_align);
                uintptr_t p_memsz = elf_field(ph, p_memsz);
                uintptr_t p_filesz = elf_field(ph, p_filesz);
+               uintptr_t p_flags = elf_field(ph, p_flags);
+
+               /* Here's the ld hack, mentioned above */
+               p_flags |= (writable ? ELF_PROT_WRITE : 0);
+               /* All mmaps need to be fixed to their VAs.  If the program wants it to
+                * be a writable region, we also need the region to be private. */
+               mm_flags = MAP_FIXED | (p_flags & ELF_PROT_WRITE ? MAP_PRIVATE : 0);
 
                if (p_type == ELF_PROG_PHDR)
-                       ei->phdr = elf_field(ph, p_va);
-               if (p_type == ELF_PROG_INTERP) {
-                       int maxlen = MIN(PGSIZE - p_offset, sizeof(ei->interp));
-                       int len = strnlen(elf + p_offset, maxlen);
-                       if (len < maxlen) {
-                               memcpy(ei->interp, elf + p_offset, maxlen + 1);
-                               ei->dynamic = 1;
-                       }
-                       else
-                               goto fail;
+                       ei->phdr = p_va;
+               else if (p_type == ELF_PROG_INTERP) {
+                       f_off = p_offset;
+                       ssize_t maxlen = sizeof(ei->interp);
+                       ssize_t bytes = f->f_op->read(f, ei->interp, maxlen, &f_off);
+                       if (bytes == -1)
+                         goto fail;
+
+                       maxlen = MIN(maxlen, bytes);
+                       if (strnlen(ei->interp, maxlen) == maxlen)
+                         goto fail;
+
+                       ei->dynamic = 1;
                }
-
-               if (p_type == ELF_PROG_LOAD && p_memsz) {
+               else if (p_type == ELF_PROG_LOAD && p_memsz) {
                        if (p_align % PGSIZE)
                                goto fail;
                        if (p_offset % PGSIZE != p_va % PGSIZE)
                                goto fail;
 
                        uintptr_t filestart = ROUNDDOWN(p_offset, PGSIZE);
-                       uintptr_t fileend = p_offset + p_filesz;
-                       uintptr_t filesz = fileend - filestart;
+                       uintptr_t filesz = p_offset + p_filesz - filestart;
 
                        uintptr_t memstart = ROUNDDOWN(p_va, PGSIZE);
-                       uintptr_t memend = ROUNDUP(p_va + p_memsz, PGSIZE);
-                       uintptr_t memsz = memend - memstart;
-                       if (memend > ei->highest_addr)
-                               ei->highest_addr = memend;
-                       /* This needs to be a PRIVATE mapping, and the stuff after the file
-                        * needs to be zeroed. */
+                       uintptr_t memsz = ROUNDUP(p_va + p_memsz, PGSIZE) - memstart;
+                       memstart += pgoffset * PGSIZE;
+
+                       if (memstart + memsz > ei->highest_addr)
+                               ei->highest_addr = memstart + memsz;
+
+                       mm_perms = 0;
+                       mm_perms |= (p_flags & ELF_PROT_READ  ? PROT_READ : 0);
+                       mm_perms |= (p_flags & ELF_PROT_WRITE ? PROT_WRITE : 0);
+                       mm_perms |= (p_flags & ELF_PROT_EXEC  ? PROT_EXEC : 0);
+
                        if (filesz) {
-                               /* TODO: figure out proper permissions from the elf */
-                               if (do_mmap(p, memstart + pgoffset * PGSIZE, filesz,
-                                          PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE,
-                                          f, filestart) == MAP_FAILED)
-                                       goto fail;
-                               /* Due to elf-ghetto-ness, we need to zero the first part of the
-                                * BSS from the last page of the data segment.  We translate to
-                                * the KVA so we don't need to worry about using the proc's
-                                * mapping */
-                               uintptr_t z_s = memstart + pgoffset * PGSIZE + filesz;
-                               pte_t *pte = pgdir_walk(p->env_pgdir, (void*)z_s, 0);
-                               assert(pte);
-                               uintptr_t kva_z_s = (uintptr_t)ppn2kva(PTE2PPN(*pte)) + PGOFF(z_s);
-                               uintptr_t kva_z_e = ROUNDUP(kva_z_s, PGSIZE);
-                               memset((void*)kva_z_s, 0, kva_z_e - kva_z_s);
-                               filesz = ROUNDUP(filesz, PGSIZE);
+                               /* Due to elf-ghetto-ness, we need to zero the first part of
+                                * the BSS from the last page of the data segment.  If we end
+                                * on a partial page, we map it in separately with
+                                * MAP_POPULATE so that we can zero the rest of it now. We
+                                * translate to the KVA so we don't need to worry about using
+                                * the proc's mapping */
+                               uintptr_t partial = PGOFF(filesz);
+
+                               if (filesz - partial) {
+                                       /* Map the complete pages. */
+                                       if (do_mmap(p, memstart, filesz - partial, mm_perms,
+                                                   mm_flags, f, filestart) == MAP_FAILED)
+                                               goto fail;
+                               }
+                               /* Note that we (probably) only need to do this zeroing the end
+                                * of a partial file page when we are dealing with
+                                * ELF_PROT_WRITE-able PHs, and not for all cases.  */
+                               if (partial) {
+                                       /* Need our own populated, private copy of the page so that
+                                        * we can zero the remainder - and not zero chunks of the
+                                        * real file in the page cache. */
+                                       mm_flags |= MAP_PRIVATE | MAP_POPULATE;
+
+                                       /* Map the final partial page. */
+                                       uintptr_t last_page = memstart + filesz - partial;
+                                       if (do_mmap(p, last_page, PGSIZE, mm_perms, mm_flags,
+                                                   f, filestart + filesz - partial) == MAP_FAILED)
+                                               goto fail;
+
+                                       /* Zero the end of it. */
+                                       pte_t *pte = pgdir_walk(p->env_pgdir, (void*)last_page, 0);
+                                       assert(pte);
+                                       void* last_page_kva = ppn2kva(PTE2PPN(*pte));
+                                       memset(last_page_kva + partial, 0, PGSIZE - partial);
+
+                                       filesz = ROUNDUP(filesz, PGSIZE);
+                               }
                        }
                        /* Any extra pages are mapped anonymously... (a bit weird) */
                        if (filesz < memsz)
-                               if (do_mmap(p, memstart + filesz + pgoffset*PGSIZE, memsz-filesz,
-                                          PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_ANON,
-                                          NULL, 0) == MAP_FAILED)
+                               if (do_mmap(p, memstart + filesz, memsz-filesz,
+                                           PROT_READ | PROT_WRITE, MAP_PRIVATE,
+                                               NULL, 0) == MAP_FAILED)
                                        goto fail;
                }
        }
        /* map in program headers anyway if not present in binary.
         * useful for TLS in static programs. */
        if (ei->phdr == -1) {
-               void *phdr_addr = do_mmap(p, MMAP_LOWEST_VA, PGSIZE, PROT_READ, 0, f,
-                                         0);
+               uintptr_t filestart = ROUNDDOWN(e_phoff, PGSIZE);
+               uintptr_t filesz = e_phoff + (e_phnum * phsz) - filestart;
+               void *phdr_addr = do_mmap(p, 0, filesz, PROT_READ | PROT_WRITE,
+                                         MAP_PRIVATE, f, filestart);
                if (phdr_addr == MAP_FAILED)
                        goto fail;
                ei->phdr = (long)phdr_addr + e_phoff;
@@ -134,23 +175,28 @@ static int load_one_elf(struct proc *p, struct file *f, int pgoffset,
        ei->phnum = e_phnum;
        ei->elf64 = elf64;
        ret = 0;
+       goto out;
 fail:
-       kfree(elf);
+       printk("[kernel] Load failed during loadelf of file %s!\n", file_name(f));
+out:
+       if (phdrs)
+               kfree(phdrs);
+       current = cur_proc;
        return ret;
 }
 
 int load_elf(struct proc* p, struct file* f)
 {
        elf_info_t ei, interp_ei;
-       if (load_one_elf(p, f, 0,& ei))
+       if (load_one_elf(p, f, 0, &ei, FALSE))
                return -1;
 
        if (ei.dynamic) {
                struct file *interp = do_file_open(ei.interp, 0, 0);
                if (!interp)
                        return -1;
-               /* careful, this could conflict with the mmap from the TLS up above */
-               int error = load_one_elf(p, interp, 2, &interp_ei);
+               /* Load dynamic linker one page into the address space */
+               int error = load_one_elf(p, interp, 1, &interp_ei, TRUE);
                kref_put(&interp->f_kref);
                if (error)
                        return -1;
@@ -181,10 +227,13 @@ int load_elf(struct proc* p, struct file* f)
        proc_init_trapframe(&p->env_tf,0,core0_entry,USTACKTOP);
        p->env_entry = ei.entry;
 
-       // map in stack using POPULATE (because SPARC requires it)
+       int flags = MAP_FIXED | MAP_ANONYMOUS;
+       #ifdef __sparc_v8__
+       flags |= MAP_POPULATE; // SPARC stacks must be mapped in
+       #endif
        uintptr_t stacksz = USTACK_NUM_PAGES*PGSIZE;
        if (do_mmap(p, USTACKTOP-stacksz, stacksz, PROT_READ | PROT_WRITE,
-                   MAP_FIXED | MAP_ANONYMOUS | MAP_POPULATE, NULL, 0) == MAP_FAILED)
+                   flags, NULL, 0) == MAP_FAILED)
                return -1;
 
        // Set the heap bottom and top to just past where the text