Fixes MAP_PRIVATE bug in mmap()
[akaros.git] / kern / src / elf.c
1 #include <mm.h>
2 #include <frontend.h>
3 #include <string.h>
4 #include <kmalloc.h>
5 #include <syscall.h>
6 #include <elf.h>
7 #include <pmap.h>
8 #include <smp.h>
9 #include <arch/arch.h>
10
11 #ifdef KERN64
12 # define elf_field(obj, field) (elf64 ? (obj##64)->field : (obj##32)->field)
13 #else
14 # define elf_field(obj, field) ((obj##32)->field)
15 #endif
16
17 static int load_one_elf(struct proc *p, struct file *f, uintptr_t pgoffset,
18                         elf_info_t *ei)
19 {
20         int ret = -1;
21         ei->phdr = -1;
22         ei->dynamic = 0;
23         ei->highest_addr = 0;
24         off_t f_off = 0;
25         void* phdrs = 0;
26         
27         /* When reading on behalf of the kernel, we need to make sure no proc is
28          * "current".  This is a bit ghetto (TODO: KFOP) */
29         struct proc *cur_proc = current;
30         current = 0;
31
32         /* Read in ELF header. */
33         elf64_t elfhdr_storage;
34         elf32_t* elfhdr32 = (elf32_t*)&elfhdr_storage;
35         elf64_t* elfhdr64 = &elfhdr_storage;
36         if (f->f_op->read(f, (char*)elfhdr64, sizeof(elf64_t), &f_off) == -1)
37                 goto fail;
38
39         bool elf32 = elfhdr32->e_ident[ELF_IDENT_CLASS] == ELFCLASS32;
40         bool elf64 = elfhdr64->e_ident[ELF_IDENT_CLASS] == ELFCLASS64;
41         if (elf64 == elf32)
42                 goto fail;
43         #ifndef KERN64
44         if(elf64)
45                 goto fail;
46         #endif
47
48         size_t phsz = elf64 ? sizeof(proghdr64_t) : sizeof(proghdr32_t);
49         uint16_t e_phnum = elf_field(elfhdr, e_phnum);
50         uint16_t e_phoff = elf_field(elfhdr, e_phoff);
51
52         /* Read in program headers. */
53         if (e_phnum > 10000 || e_phoff % (elf32 ? 4 : 8) != 0)
54           goto fail;
55         phdrs = kmalloc(e_phnum * phsz, 0);
56         f_off = e_phoff;
57         if (!phdrs || f->f_op->read(f, phdrs, e_phnum * phsz, &f_off) == -1)
58                 goto fail;
59
60         int flags = MAP_FIXED | MAP_PRIVATE;    /* TODO: why private? */
61
62         for (int i = 0; i < e_phnum; i++) {
63                 proghdr32_t* ph32 = (proghdr32_t*)phdrs + i;
64                 proghdr64_t* ph64 = (proghdr64_t*)phdrs + i;
65                 uint16_t p_type = elf_field(ph, p_type);
66                 uintptr_t p_va = elf_field(ph, p_va);
67                 uintptr_t p_offset = elf_field(ph, p_offset);
68                 uintptr_t p_align = elf_field(ph, p_align);
69                 uintptr_t p_memsz = elf_field(ph, p_memsz);
70                 uintptr_t p_filesz = elf_field(ph, p_filesz);
71
72                 if (p_type == ELF_PROG_PHDR)
73                         ei->phdr = p_va;
74                 else if (p_type == ELF_PROG_INTERP) {
75                         f_off = p_offset;
76                         ssize_t maxlen = sizeof(ei->interp);
77                         ssize_t bytes = f->f_op->read(f, ei->interp, maxlen, &f_off);
78                         if (bytes == -1)
79                           goto fail;
80
81                         maxlen = MIN(maxlen, bytes);
82                         if (strnlen(ei->interp, maxlen) == maxlen)
83                           goto fail;
84
85                         ei->dynamic = 1;
86                 }
87                 else if (p_type == ELF_PROG_LOAD && p_memsz) {
88                         if (p_align % PGSIZE)
89                                 goto fail;
90                         if (p_offset % PGSIZE != p_va % PGSIZE)
91                                 goto fail;
92
93                         uintptr_t filestart = ROUNDDOWN(p_offset, PGSIZE);
94                         uintptr_t filesz = p_offset + p_filesz - filestart;
95
96                         uintptr_t memstart = ROUNDDOWN(p_va, PGSIZE);
97                         uintptr_t memsz = ROUNDUP(p_va + p_memsz, PGSIZE) - memstart;
98                         memstart += pgoffset * PGSIZE;
99
100                         if (memstart + memsz > ei->highest_addr)
101                                 ei->highest_addr = memstart + memsz;
102
103                         /* TODO: figure out proper permissions from the elf */
104                         int perms = PROT_READ | PROT_WRITE | PROT_EXEC;
105
106                         if (filesz) {
107                                 /* Due to elf-ghetto-ness, we need to zero the first part of
108                                  * the BSS from the last page of the data segment.  If we end
109                                  * on a partial page, we map it in separately with
110                                  * MAP_POPULATE so that we can zero the rest of it now. We
111                                  * translate to the KVA so we don't need to worry about using
112                                  * the proc's mapping */
113                                 uintptr_t partial = PGOFF(filesz);
114
115                                 if (filesz - partial) {
116                                         /* Map the complete pages. */
117                                         if (do_mmap(p, memstart, filesz - partial, perms, flags,
118                                                     f, filestart) == MAP_FAILED)
119                                                 goto fail;
120                                 }
121
122                                 if (partial) {
123                                         /* Map the final partial page. */
124                                         uintptr_t last_page = memstart + filesz - partial;
125                                         int partial_flags = flags | MAP_POPULATE;
126                                         if (do_mmap(p, last_page, PGSIZE, perms, partial_flags,
127                                                     f, filestart + filesz - partial) == MAP_FAILED)
128                                                 goto fail;
129
130                                         /* Zero the end of it. */
131                                         pte_t *pte = pgdir_walk(p->env_pgdir, (void*)last_page, 0);
132                                         assert(pte);
133                                         void* last_page_kva = ppn2kva(PTE2PPN(*pte));
134                                         memset(last_page_kva + partial, 0, PGSIZE - partial);
135
136                                         filesz = ROUNDUP(filesz, PGSIZE);
137                                 }
138                         }
139                         /* Any extra pages are mapped anonymously... (a bit weird) */
140                         if (filesz < memsz)
141                                 if (do_mmap(p, memstart + filesz, memsz-filesz,
142                                            perms, flags, NULL, 0) == MAP_FAILED)
143                                         goto fail;
144                 }
145         }
146         /* map in program headers anyway if not present in binary.
147          * useful for TLS in static programs. */
148         if (ei->phdr == -1) {
149                 uintptr_t filestart = ROUNDDOWN(e_phoff, PGSIZE);
150                 uintptr_t filesz = e_phoff + (e_phnum * phsz) - filestart;
151                 void *phdr_addr = do_mmap(p, 0, filesz, PROT_READ,
152                                           flags & ~MAP_FIXED, f, filestart);
153                 if (phdr_addr == MAP_FAILED)
154                         goto fail;
155                 ei->phdr = (long)phdr_addr + e_phoff;
156         }
157         ei->entry = elf_field(elfhdr, e_entry) + pgoffset*PGSIZE;
158         ei->phnum = e_phnum;
159         ei->elf64 = elf64;
160         ret = 0;
161 fail:
162         if (phdrs)
163           kfree(phdrs);
164         current = cur_proc;
165         return ret;
166 }
167
168 int load_elf(struct proc* p, struct file* f)
169 {
170         elf_info_t ei, interp_ei;
171         if (load_one_elf(p, f, 0, &ei))
172                 return -1;
173
174         if (ei.dynamic) {
175                 struct file *interp = do_file_open(ei.interp, 0, 0);
176                 if (!interp)
177                         return -1;
178                 /* Load dynamic linker one page into the address space */
179                 int error = load_one_elf(p, interp, 1, &interp_ei);
180                 kref_put(&interp->f_kref);
181                 if (error)
182                         return -1;
183         }
184
185         // fill in auxiliary info for dynamic linker/runtime
186         elf_aux_t auxp[] = {{ELF_AUX_PHDR, ei.phdr},
187                             {ELF_AUX_PHENT, sizeof(proghdr32_t)},
188                             {ELF_AUX_PHNUM, ei.phnum},
189                             {ELF_AUX_ENTRY, ei.entry},
190                             #ifdef __sparc_v8__
191                             {ELF_AUX_HWCAP, ELF_HWCAP_SPARC_FLUSH},
192                             #endif
193                             {0, 0}};
194
195         // put auxp after argv, envp in procinfo
196         int auxp_pos = -1;
197         for (int i = 0, zeros = 0; i < PROCINFO_MAX_ARGP; i++)
198                 if (p->procinfo->argp[i] == NULL)
199                         if (++zeros == 2)
200                                 auxp_pos = i + 1;
201         if (auxp_pos == -1 ||
202             auxp_pos + sizeof(auxp) / sizeof(char*) >= PROCINFO_MAX_ARGP)
203                 return -1;
204         memcpy(p->procinfo->argp+auxp_pos,auxp,sizeof(auxp));
205
206         uintptr_t core0_entry = ei.dynamic ? interp_ei.entry : ei.entry;
207         proc_init_trapframe(&p->env_tf,0,core0_entry,USTACKTOP);
208         p->env_entry = ei.entry;
209
210         int flags = MAP_FIXED | MAP_ANONYMOUS;
211         #ifdef __sparc_v8__
212         flags |= MAP_POPULATE; // SPARC stacks must be mapped in
213         #endif
214         uintptr_t stacksz = USTACK_NUM_PAGES*PGSIZE;
215         if (do_mmap(p, USTACKTOP-stacksz, stacksz, PROT_READ | PROT_WRITE,
216                     flags, NULL, 0) == MAP_FAILED)
217                 return -1;
218
219         // Set the heap bottom and top to just past where the text 
220         // region has been loaded
221         p->heap_top = (void*)ei.highest_addr;
222         p->procinfo->heap_bottom = p->heap_top;
223
224         return 0;
225 }
226