12 # define elf_field(obj, field) (elf64 ? (obj##64)->field : (obj##32)->field)
14 # define elf_field(obj, field) ((obj##32)->field)
17 /* Check if the file is valid elf file (i.e. by checking for ELF_MAGIC in the
19 bool is_valid_elf(struct file *f)
23 uintptr_t c = switch_to_ktask();
25 if (f->f_op->read(f, (char*)&h, sizeof(elf64_t), &o) != sizeof(elf64_t)) {
28 if (h.e_magic != ELF_MAGIC) {
32 switch_back_from_ktask(c);
35 switch_back_from_ktask(c);
39 static uintptr_t populate_stack(struct proc *p, int argc, char *argv[],
40 int envc, char *envp[],
41 int auxc, elf_aux_t auxv[])
43 /* Map in pages for p's stack. */
44 int flags = MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE;
45 uintptr_t stacksz = USTACK_NUM_PAGES*PGSIZE;
46 if (do_mmap(p, USTACKTOP-stacksz, stacksz, PROT_READ | PROT_WRITE,
47 flags, NULL, 0) == MAP_FAILED)
50 /* Function to get the lengths of the argument and environment strings. */
51 int get_lens(int argc, char *argv[], int arg_lens[])
54 for (int i = 0; i < argc; i++) {
55 arg_lens[i] = strlen(argv[i]) + 1;
61 /* Function to help map the argument and environment strings, to their
63 int remap(int argc, char *argv[], char *new_argv[],
64 char new_argbuf[], int arg_lens[])
67 char *temp_argv[argc + 1];
68 for(int i = 0; i < argc; i++) {
69 if (memcpy_to_user(p, new_argbuf + offset, argv[i], arg_lens[i]))
71 temp_argv[i] = new_argbuf + offset;
72 offset += arg_lens[i];
74 temp_argv[argc] = NULL;
75 if (memcpy_to_user(p, new_argv, temp_argv, sizeof(temp_argv)))
80 /* Start tracking the size of the buffer necessary to hold all of our data
81 * on the stack. Preallocate space for argc, argv, envp, and auxv in this
84 bufsize += 1 * sizeof(size_t);
85 bufsize += (auxc + 1) * sizeof(elf_aux_t);
86 bufsize += (envc + 1) * sizeof(char**);
87 bufsize += (argc + 1) * sizeof(char**);
89 /* Add in the size of the env and arg strings. */
92 bufsize += get_lens(argc, argv, arg_lens);
93 bufsize += get_lens(envc, envp, env_lens);
95 /* Adjust bufsize so that our buffer will ultimately be 16 byte aligned. */
96 bufsize = ROUNDUP(bufsize, 16);
98 /* Set up pointers to all of the appropriate data regions we map to. */
99 size_t *new_argc = (size_t*)(USTACKTOP - bufsize);
100 char **new_argv = (char**)(new_argc + 1);
101 char **new_envp = new_argv + argc + 1;
102 elf_aux_t *new_auxv = (elf_aux_t*)(new_envp + envc + 1);
103 char *new_argbuf = (char*)(new_auxv + auxc + 1);
105 /* Verify that all data associated with our argv, envp, and auxv arrays
106 * (and any corresponding strings they point to) will fit in the space
108 if (bufsize > ARG_MAX)
111 /* Map argc into its final location. */
112 if (memcpy_to_user(p, new_argc, &argc, sizeof(size_t)))
115 /* Map all data for argv and envp into its final location. */
117 offset = remap(argc, argv, new_argv, new_argbuf, arg_lens);
120 offset = remap(envc, envp, new_envp, new_argbuf + offset, env_lens);
124 /* Map auxv into its final location. */
125 elf_aux_t null_aux = {0, 0};
126 if (memcpy_to_user(p, new_auxv, auxv, auxc * sizeof(elf_aux_t)))
128 if (memcpy_to_user(p, new_auxv + auxc, &null_aux, sizeof(elf_aux_t)))
131 return USTACKTOP - bufsize;
134 /* We need the writable flag for ld. Even though the elf header says it wants
135 * RX (and not W) for its main program header, it will page fault (eip 56f0,
136 * 46f0 after being relocated to 0x1000, va 0x20f4). */
137 static int load_one_elf(struct proc *p, struct file *f, uintptr_t pg_num,
138 elf_info_t *ei, bool writable)
143 ei->highest_addr = 0;
146 int mm_perms, mm_flags;
148 /* When reading on behalf of the kernel, we need to switch to a ktask so
149 * the VFS (and maybe other places) know. (TODO: KFOP) */
150 uintptr_t old_ret = switch_to_ktask();
152 /* Read in ELF header. */
153 elf64_t elfhdr_storage;
154 elf32_t* elfhdr32 = (elf32_t*)&elfhdr_storage;
155 elf64_t* elfhdr64 = &elfhdr_storage;
156 if (f->f_op->read(f, (char*)elfhdr64, sizeof(elf64_t), &f_off)
157 != sizeof(elf64_t)) {
158 /* if you ever debug this, be sure to 0 out elfhrd_storage in advance */
159 printk("[kernel] load_one_elf: failed to read file\n");
162 if (elfhdr64->e_magic != ELF_MAGIC) {
163 printk("[kernel] load_one_elf: file is not an elf!\n");
166 bool elf32 = elfhdr32->e_ident[ELF_IDENT_CLASS] == ELFCLASS32;
167 bool elf64 = elfhdr64->e_ident[ELF_IDENT_CLASS] == ELFCLASS64;
168 if (elf64 == elf32) {
169 printk("[kernel] load_one_elf: ID as both 32 and 64 bit\n");
174 printk("[kernel] load_one_elf: 64 bit elf on 32 bit kernel\n");
178 /* Not sure what RISCV's 64 bit kernel can do here, so this check is x86
182 printk("[kernel] load_one_elf: 32 bit elf on 64 bit kernel\n");
187 size_t phsz = elf64 ? sizeof(proghdr64_t) : sizeof(proghdr32_t);
188 uint16_t e_phnum = elf_field(elfhdr, e_phnum);
189 uint16_t e_phoff = elf_field(elfhdr, e_phoff);
191 /* Read in program headers. */
192 if (e_phnum > 10000 || e_phoff % (elf32 ? 4 : 8) != 0) {
193 printk("[kernel] load_one_elf: Bad program headers\n");
196 phdrs = kmalloc(e_phnum * phsz, 0);
198 if (!phdrs || f->f_op->read(f, phdrs, e_phnum * phsz, &f_off) !=
200 printk("[kernel] load_one_elf: could not get program headers\n");
203 for (int i = 0; i < e_phnum; i++) {
204 proghdr32_t* ph32 = (proghdr32_t*)phdrs + i;
205 proghdr64_t* ph64 = (proghdr64_t*)phdrs + i;
206 uint16_t p_type = elf_field(ph, p_type);
207 uintptr_t p_va = elf_field(ph, p_va);
208 uintptr_t p_offset = elf_field(ph, p_offset);
209 uintptr_t p_align = elf_field(ph, p_align);
210 uintptr_t p_memsz = elf_field(ph, p_memsz);
211 uintptr_t p_filesz = elf_field(ph, p_filesz);
212 uintptr_t p_flags = elf_field(ph, p_flags);
214 /* Here's the ld hack, mentioned above */
215 p_flags |= (writable ? ELF_PROT_WRITE : 0);
216 /* All mmaps need to be fixed to their VAs. If the program wants it to
217 * be a writable region, we also need the region to be private. */
218 mm_flags = MAP_FIXED |
219 (p_flags & ELF_PROT_WRITE ? MAP_PRIVATE : MAP_SHARED);
221 if (p_type == ELF_PROG_PHDR)
223 else if (p_type == ELF_PROG_INTERP) {
225 ssize_t maxlen = sizeof(ei->interp);
226 ssize_t bytes = f->f_op->read(f, ei->interp, maxlen, &f_off);
227 /* trying to catch errors. don't know how big it could be, but it
228 * should be at least 0. */
230 printk("[kernel] load_one_elf: could not read ei->interp\n");
234 maxlen = MIN(maxlen, bytes);
235 if (strnlen(ei->interp, maxlen) == maxlen) {
236 printk("[kernel] load_one_elf: interpreter name too long\n");
242 else if (p_type == ELF_PROG_LOAD && p_memsz) {
243 if (p_align % PGSIZE) {
244 printk("[kernel] load_one_elf: not page aligned\n");
247 if (p_offset % PGSIZE != p_va % PGSIZE) {
248 printk("[kernel] load_one_elf: offset difference \n");
252 uintptr_t filestart = ROUNDDOWN(p_offset, PGSIZE);
253 uintptr_t filesz = p_offset + p_filesz - filestart;
255 uintptr_t memstart = ROUNDDOWN(p_va, PGSIZE);
256 uintptr_t memsz = ROUNDUP(p_va + p_memsz, PGSIZE) - memstart;
257 memstart += pg_num * PGSIZE;
259 if (memstart + memsz > ei->highest_addr)
260 ei->highest_addr = memstart + memsz;
263 mm_perms |= (p_flags & ELF_PROT_READ ? PROT_READ : 0);
264 mm_perms |= (p_flags & ELF_PROT_WRITE ? PROT_WRITE : 0);
265 mm_perms |= (p_flags & ELF_PROT_EXEC ? PROT_EXEC : 0);
268 /* Due to elf-ghetto-ness, we need to zero the first part of
269 * the BSS from the last page of the data segment. If we end
270 * on a partial page, we map it in separately with
271 * MAP_POPULATE so that we can zero the rest of it now. We
272 * translate to the KVA so we don't need to worry about using
273 * the proc's mapping */
274 uintptr_t partial = PGOFF(filesz);
276 if (filesz - partial) {
277 /* Map the complete pages. */
278 if (do_mmap(p, memstart, filesz - partial, mm_perms,
279 mm_flags, f, filestart) == MAP_FAILED) {
280 printk("[kernel] load_one_elf: complete mmap failed\n");
284 /* Note that we (probably) only need to do this zeroing the end
285 * of a partial file page when we are dealing with
286 * ELF_PROT_WRITE-able PHs, and not for all cases. */
288 /* Need our own populated, private copy of the page so that
289 * we can zero the remainder - and not zero chunks of the
290 * real file in the page cache. */
291 mm_flags &= ~MAP_SHARED;
292 mm_flags |= MAP_PRIVATE | MAP_POPULATE;
294 /* Map the final partial page. */
295 uintptr_t last_page = memstart + filesz - partial;
296 if (do_mmap(p, last_page, PGSIZE, mm_perms, mm_flags,
297 f, filestart + filesz - partial) == MAP_FAILED) {
298 printk("[kernel] load_one_elf: partial mmap failed\n");
302 /* Zero the end of it. This is a huge pain in the ass. The
303 * filesystems should zero out the last bits of a page if
304 * the file doesn't fill the last page. But we're dealing
305 * with windows into otherwise complete files. */
306 pte_t pte = pgdir_walk(p->env_pgdir, (void*)last_page, 0);
307 /* if we were able to get a PTE, then there is a real page
308 * backing the VMR, and we need to zero the excess. if
309 * there isn't, then the page fault code should handle it.
310 * since we set populate above, we should have a PTE, except
311 * in cases where the offset + len window exceeded the file
312 * size. in this case, we let them mmap it, but didn't
313 * populate it. there will be a PF right away if someone
314 * tries to use this. check out do_mmap for more info. */
315 if (pte_walk_okay(pte)) {
316 void* last_page_kva = KADDR(pte_get_paddr(pte));
317 memset(last_page_kva + partial, 0, PGSIZE - partial);
320 filesz = ROUNDUP(filesz, PGSIZE);
323 /* Any extra pages are mapped anonymously... (a bit weird) */
325 if (do_mmap(p, memstart + filesz, memsz-filesz,
326 PROT_READ | PROT_WRITE, MAP_PRIVATE,
327 NULL, 0) == MAP_FAILED) {
328 printk("[kernel] load_one_elf: anon mmap failed\n");
333 /* map in program headers anyway if not present in binary.
334 * useful for TLS in static programs. */
335 if (ei->phdr == -1) {
336 uintptr_t filestart = ROUNDDOWN(e_phoff, PGSIZE);
337 uintptr_t filesz = e_phoff + (e_phnum * phsz) - filestart;
338 void *phdr_addr = do_mmap(p, 0, filesz, PROT_READ | PROT_WRITE,
339 MAP_PRIVATE, f, filestart);
340 if (phdr_addr == MAP_FAILED) {
341 printk("[kernel] load_one_elf: prog header mmap failed\n");
344 ei->phdr = (long)phdr_addr + e_phoff;
346 ei->entry = elf_field(elfhdr, e_entry) + pg_num * PGSIZE;
354 switch_back_from_ktask(old_ret);
358 int load_elf(struct proc* p, struct file* f,
359 int argc, char *argv[], int envc, char *envp[])
361 elf_info_t ei, interp_ei;
362 if (load_one_elf(p, f, 0, &ei, FALSE))
366 struct file *interp = do_file_open(ei.interp, O_READ, 0);
369 /* Load dynamic linker at 1M. Obvious MIB joke avoided.
370 * It used to be loaded at page 1, but the existence of valid addresses
371 * that low masked bad derefs through NULL pointer structs. This in turn
372 * helped us waste a full day debugging a bug in the Go runtime. True!
373 * Note that MMAP_LOWEST_VA also has this value but we want to make this
375 int error = load_one_elf(p, interp, MMAP_LD_FIXED_VA >> PGSHIFT,
377 kref_put(&interp->f_kref);
382 /* Set up the auxiliary info for dynamic linker/runtime */
383 elf_aux_t auxv[] = {{ELF_AUX_PHDR, ei.phdr},
384 {ELF_AUX_PHENT, sizeof(proghdr32_t)},
385 {ELF_AUX_PHNUM, ei.phnum},
386 {ELF_AUX_ENTRY, ei.entry}};
387 int auxc = sizeof(auxv)/sizeof(auxv[0]);
389 /* Populate the stack with the required info. */
390 uintptr_t stack_top = populate_stack(p, argc, argv, envc, envp, auxc, auxv);
394 /* Initialize the process as an SCP. */
395 uintptr_t core0_entry = ei.dynamic ? interp_ei.entry : ei.entry;
396 proc_init_ctx(&p->scp_ctx, 0, core0_entry, stack_top, 0);
398 p->procinfo->program_end = ei.highest_addr;
399 p->args_base = (void *) stack_top;
404 ssize_t get_startup_argc(struct proc *p)
406 const char *sptr = (const char *) p->args_base;
409 /* TODO,DL: Use copy_from_user() when available.
411 if (memcpy_from_user(p, &argc, sptr, sizeof(size_t)))
417 char *get_startup_argv(struct proc *p, size_t idx, char *argp,
420 size_t stack_space = (const char *) USTACKTOP - (const char *) p->args_base;
421 const char *sptr = (const char *) p->args_base + sizeof(size_t) +
422 idx * sizeof(char *);
423 const char *argv = NULL;
425 /* TODO,DL: Use copy_from_user() when available.
427 if (memcpy_from_user(p, &argv, sptr, sizeof(char *)))
430 /* TODO,DL: Use strncpy_from_user() when available.
432 max_size = MIN(max_size, stack_space);
433 if (memcpy_from_user(p, argp, argv, max_size))
435 argp[max_size - 1] = 0;