1 /* See COPYRIGHT for copyright information. */
9 #include <arch/bitmask.h>
25 #include <ros/syscall.h>
26 #include <ros/error.h>
28 env_t *envs = NULL; // All environments
30 // TODO: make this a struct of info including the pointer and cacheline-align it
31 // This lets the kernel know what process is running on the core it traps into.
32 // A lot of the Env business, including this and its usage, will change when we
33 // redesign the env as a multi-process.
34 env_t* (RO curenvs)[MAX_NUM_CPUS] = {[0 ... (MAX_NUM_CPUS-1)] NULL};
36 #define ENVGENSHIFT 12 // >= LOGNENV
39 // Converts an envid to an env pointer.
42 // 0 on success, -EBADENV on error.
43 // On success, sets *env_store to the environment.
44 // On error, sets *env_store to NULL.
47 envid2env(envid_t envid, env_t **env_store, bool checkperm)
51 // If envid is zero, return the current environment.
57 // Look up the Env structure via the index part of the envid,
58 // then check the env_id field in that env_t
59 // to ensure that the envid is not stale
60 // (i.e., does not refer to a _previous_ environment
61 // that used the same slot in the envs[] array).
62 e = &envs[ENVX(envid)];
63 if (e->state == ENV_FREE || e->env_id != envid) {
68 // Check that the calling environment has legitimate permission
69 // to manipulate the specified environment.
70 // If checkperm is set, the specified environment
71 // must be either the current environment
72 // or an immediate child of the current environment.
73 // TODO: should check for current being null
74 if (checkperm && e != current && e->env_parent_id != current->env_id) {
84 // Mark all environments in 'envs' as free, set their env_ids to 0,
85 // and insert them into the proc_freelist.
86 // Insert in reverse order, so that the first call to env_alloc()
88 // TODO: get rid of this whole array bullshit
96 // core 0 is not idle, all others are (for now)
97 spin_lock(&idle_lock);
98 num_idlecores = num_cpus - 1;
99 for (i = 0; i < num_idlecores; i++)
100 idlecoremap[i] = i + 1;
101 spin_unlock(&idle_lock);
102 atomic_init(&num_envs, 0);
103 TAILQ_INIT(&proc_freelist);
104 assert(envs != NULL);
105 for (i = NENV-1; i >= 0; i--) {
106 // these should already be set from when i memset'd the array to 0
107 envs[i].state = ENV_FREE;
108 envs[i].end_text_segment = (void*)UTEXT;
109 envs[i].end_data_segment = (void*)UTEXT;
111 TAILQ_INSERT_HEAD(&proc_freelist, &envs[i], proc_link);
116 // Initialize the kernel virtual memory layout for environment e.
117 // Allocate a page directory, set e->env_pgdir and e->env_cr3 accordingly,
118 // and initialize the kernel portion of the new environment's address space.
119 // Do NOT (yet) map anything into the user portion
120 // of the environment's virtual address space.
122 // Returns 0 on success, < 0 on error. Errors include:
123 // -ENOMEM if page directory or table could not be allocated.
126 env_setup_vm(env_t *e)
127 WRITES(e->env_pgdir, e->env_cr3, e->env_procinfo, e->env_procdata)
130 page_t *pgdir = NULL;
131 page_t *pginfo[PROCINFO_NUM_PAGES] = {NULL};
132 page_t *pgdata[PROCDATA_NUM_PAGES] = {NULL};
133 static page_t * RO shared_page = 0;
136 * First, allocate a page for the pgdir of this process and up
137 * its reference count since this will never be done elsewhere
139 r = page_alloc(&pgdir);
144 * Next, set up the e->env_pgdir and e->env_cr3 pointers to point
145 * to this newly allocated page and clear its contents
147 memset(page2kva(pgdir), 0, PGSIZE);
148 e->env_pgdir = (pde_t *COUNT(NPDENTRIES)) TC(page2kva(pgdir));
149 e->env_cr3 = (physaddr_t) TC(page2pa(pgdir));
152 * Now start filling in the pgdir with mappings required by all newly
153 * created address spaces
156 // Map in the kernel to the top of every address space
157 // should be able to do this so long as boot_pgdir never has
158 // anything put below UTOP
159 // TODO check on this! had a nasty bug because of it
160 // this is a bit wonky, since if it's not PGSIZE, lots of other things are
162 memcpy(e->env_pgdir, boot_pgdir, NPDENTRIES*sizeof(pde_t));
164 // VPT and UVPT map the env's own page table, with
165 // different permissions.
166 e->env_pgdir[PDX(VPT)] = PTE(LA2PPN(e->env_cr3), PTE_P | PTE_KERN_RW);
167 e->env_pgdir[PDX(UVPT)] = PTE(LA2PPN(e->env_cr3), PTE_P | PTE_USER_RO);
170 * Now allocate and insert all pages required for the shared
171 * procinfo structure into the page table
173 for(int i=0; i<PROCINFO_NUM_PAGES; i++) {
174 if(page_alloc(&pginfo[i]) < 0)
175 goto env_setup_vm_error;
176 if(page_insert(e->env_pgdir, pginfo[i], (void*SNT)(UINFO + i*PGSIZE),
178 goto env_setup_vm_error;
182 * Now allocate and insert all pages required for the shared
183 * procdata structure into the page table
185 for(int i=0; i<PROCDATA_NUM_PAGES; i++) {
186 if(page_alloc(&pgdata[i]) < 0)
187 goto env_setup_vm_error;
188 if(page_insert(e->env_pgdir, pgdata[i], (void*SNT)(UDATA + i*PGSIZE),
190 goto env_setup_vm_error;
194 * Now, set e->env_procinfo, and e->env_procdata to point to
195 * the proper pages just allocated and clear them out.
197 e->env_procinfo = (procinfo_t *SAFE) TC(page2kva(pginfo[0]));
198 e->env_procdata = (procdata_t *SAFE) TC(page2kva(pgdata[0]));
200 memset(e->env_procinfo, 0, sizeof(procinfo_t));
201 memset(e->env_procdata, 0, sizeof(procdata_t));
203 /* Finally, set up the Global Shared Data page for all processes.
204 * Can't be trusted, but still very useful at this stage for us.
205 * Consider removing when we have real processes.
206 * (TODO). Note the page is alloced only the first time through
209 if(page_alloc(&shared_page) < 0)
210 goto env_setup_vm_error;
211 // Up it, so it never goes away. One per user, plus one from page_alloc
212 // This is necessary, since it's in the per-process range of memory that
213 // gets freed during page_free.
214 page_incref(shared_page);
217 // Inserted into every process's address space at UGDATA
218 if(page_insert(e->env_pgdir, shared_page, (void*SNT)UGDATA, PTE_USER_RW) < 0)
219 goto env_setup_vm_error;
224 page_free(shared_page);
225 for(int i=0; i< PROCDATA_NUM_PAGES; i++) {
226 page_free(pgdata[i]);
228 for(int i=0; i< PROCINFO_NUM_PAGES; i++) {
229 page_free(pginfo[i]);
231 env_user_mem_free(e);
237 proc_init_procinfo(struct proc* p)
239 p->env_procinfo->id = (p->env_id & 0x3FF);
241 // TODO: maybe do something smarter here
242 p->env_procinfo->max_harts = num_cpus-1;
245 // Sets up argc/argv in procinfo. Returns number of
246 // args successfully imported (because of size restrictions).
247 // The procinfo pages must have been mapped into the user's
248 // address space before this function can be called.
250 proc_init_argc_argv_v(struct proc* p, size_t nargs, va_list args)
252 // TODO: right now we assume procinfo can be directly addressed
253 // by the kernel (i.e. it's continguous.
254 static_assert(sizeof(struct procinfo) <= PGSIZE);
256 if(nargs > PROCINFO_MAX_ARGC)
257 nargs = PROCINFO_MAX_ARGC;
259 char* argv[PROCINFO_MAX_ARGC] = {0};
260 static_assert(sizeof(argv) == sizeof(p->env_procinfo->argv));
262 size_t size = 0, argc;
263 for(argc = 0; argc < nargs; argc++)
265 const char* arg = va_arg(args,const char*);
266 size_t len = strnlen(arg,PROCINFO_MAX_ARGV_SIZE);
267 if(size+len+1 > PROCINFO_MAX_ARGV_SIZE)
269 memcpy(&p->env_procinfo->argv_buf[size],arg,len+1);
270 argv[argc] = (char*)(UINFO+offsetof(struct procinfo,argv_buf)+size);
274 p->env_procinfo->argc = argc;
275 memcpy(p->env_procinfo->argv,argv,sizeof(argv));
281 proc_init_argc_argv(struct proc* p, size_t nargs, ...)
286 va_start(list,nargs);
288 ret = proc_init_argc_argv_v(p,nargs,list);
296 // Allocates and initializes a new environment.
297 // On success, the new environment is stored in *newenv_store.
299 // Returns 0 on success, < 0 on failure. Errors include:
300 // -ENOFREEENV if all NENVS environments are allocated
301 // -ENOMEM on memory exhaustion
304 env_alloc(env_t **newenv_store, envid_t parent_id)
310 spin_lock(&freelist_lock);
311 e = TAILQ_FIRST(&proc_freelist);
313 TAILQ_REMOVE(&proc_freelist, e, proc_link);
314 spin_unlock(&freelist_lock);
316 spin_unlock(&freelist_lock);
322 // Allocate and set up the page directory for this environment.
323 if ((r = env_setup_vm(e)) < 0) {
324 spin_lock(&freelist_lock);
325 TAILQ_INSERT_HEAD(&proc_freelist, e, proc_link);
326 spin_unlock(&freelist_lock);
330 // Generate an env_id for this environment.
331 generation = (e->env_id + (1 << ENVGENSHIFT)) & ~(NENV - 1);
332 if (generation <= 0) // Don't create a negative env_id.
333 generation = 1 << ENVGENSHIFT;
334 e->env_id = generation | (e - envs);
336 // Set the basic status variables.
338 e->env_parent_id = parent_id;
339 proc_set_state(e, PROC_CREATED);
343 e->env_entry = 0; // cheating. this really gets set in load_icode
345 for (int i = 0; i < MAX_NUM_CPUS; i++)
347 e->cache_colors_map = kmalloc(llc_cache->num_colors, 0);
348 CLR_BITMASK(e->cache_colors_map, llc_cache->num_colors);
349 memset(&e->resources, 0, sizeof(e->resources));
351 memset(&e->env_ancillary_state, 0, sizeof(e->env_ancillary_state));
352 memset(&e->env_tf, 0, sizeof(e->env_tf));
353 proc_init_trapframe(&e->env_tf);
355 proc_init_procinfo(e);
358 * Initialize the contents of the e->env_procdata structure
360 // Initialize the generic syscall ring buffer
361 SHARED_RING_INIT(&e->env_procdata->syscallring);
362 // Initialize the backend of the syscall ring buffer
363 BACK_RING_INIT(&e->syscallbackring,
364 &e->env_procdata->syscallring,
367 // Initialize the generic sysevent ring buffer
368 SHARED_RING_INIT(&e->env_procdata->syseventring);
369 // Initialize the frontend of the sysevent ring buffer
370 FRONT_RING_INIT(&e->syseventfrontring,
371 &e->env_procdata->syseventring,
375 atomic_inc(&num_envs);
377 printk("[%08x] new env %08x\n", current ? current->env_id : 0, e->env_id);
383 // Allocate len bytes of physical memory for environment env,
384 // and map it at virtual address va in the environment's address space.
385 // Does not zero or otherwise initialize the mapped pages in any way.
386 // Pages should be writable by user and kernel.
387 // Panic if any allocation attempt fails.
390 env_segment_alloc(env_t *e, void *SNT va, size_t len)
392 void *SNT start, *SNT end;
398 start = ROUNDDOWN(va, PGSIZE);
399 end = ROUNDUP(va + len, PGSIZE);
401 panic("Wrap-around in memory allocation addresses!");
402 if ((uintptr_t)end > UTOP)
403 panic("Attempting to map above UTOP!");
404 // page_insert/pgdir_walk alloc a page and read/write to it via its address
405 // starting from pgdir (e's), so we need to be using e's pgdir
406 assert(e->env_cr3 == rcr3());
407 num_pages = LA2PPN(end - start);
409 for (i = 0; i < num_pages; i++, start += PGSIZE) {
410 // skip if a page is already mapped. yes, page_insert will page_remove
411 // whatever page was already there, but if we are seg allocing adjacent
412 // regions, we don't want to destroy that old mapping/page
413 // though later on we are told we can ignore this...
414 pte = pgdir_walk(e->env_pgdir, start, 0);
415 if (pte && *pte & PTE_P)
417 if ((r = page_alloc(&page)) < 0)
418 panic("env_segment_alloc: %e", r);
419 page_insert(e->env_pgdir, page, start, PTE_USER_RW);
424 env_segment_free(env_t *e, void *SNT va, size_t len)
426 void *SNT start, *SNT end;
431 // Round this up this time so we don't free the page that va is actually on
432 start = ROUNDUP(va, PGSIZE);
433 end = ROUNDUP(va + len, PGSIZE);
435 panic("Wrap-around in memory free addresses!");
436 if ((uintptr_t)end > UTOP)
437 panic("Attempting to unmap above UTOP!");
438 // page_insert/pgdir_walk alloc a page and read/write to it via its address
439 // starting from pgdir (e's), so we need to be using e's pgdir
440 assert(e->env_cr3 == rcr3());
441 num_pages = LA2PPN(end - start);
443 for (int i = 0; i < num_pages; i++, start += PGSIZE) {
444 // skip if a page is already unmapped.
445 pte = pgdir_walk(e->env_pgdir, start, 0);
446 if (pte && *pte & PTE_P)
447 page_remove(e->env_pgdir,start);
452 // Set up the initial program binary, stack, and processor flags
453 // for a user process.
455 // This function loads all loadable segments from the ELF binary image
456 // into the environment's user memory, starting at the appropriate
457 // virtual addresses indicated in the ELF program header.
458 // At the same time it clears to zero any portions of these segments
459 // that are marked in the program header as being mapped
460 // but not actually present in the ELF file - i.e., the program's bss section.
462 // Finally, this function maps one page for the program's initial stack.
464 load_icode(env_t *SAFE e, uint8_t *COUNT(size) binary, size_t size)
466 // asw: copy the headers because they might not be aligned.
470 memcpy(&elfhdr, binary, sizeof(elfhdr));
475 assert(elfhdr.e_magic == ELF_MAGIC);
476 // make sure we have proghdrs to load
477 assert(elfhdr.e_phnum);
479 // to actually access any pages alloc'd for this environment, we
480 // need to have the hardware use this environment's page tables.
481 uintreg_t old_cr3 = rcr3();
483 * Even though we'll decref later and no one should be killing us at this
484 * stage, we're still going to wrap the lcr3s with incref/decref.
486 * Note we never decref on the old_cr3, since we aren't willing to let it
487 * die. It's also not clear who the previous process is - sometimes it
488 * isn't even a process (when the kernel loads on its own, and not in
489 * response to a syscall). Probably need to think more about this (TODO)
491 * This can get a bit tricky if this code blocks (will need to think about a
492 * decref then), if we try to change states, etc.
497 // TODO: how do we do a runtime COUNT?
498 {TRUSTEDBLOCK // zra: TRUSTEDBLOCK until validation is done.
499 for (i = 0; i < elfhdr.e_phnum; i++) {
500 memcpy(&phdr, binary + elfhdr.e_phoff + i*sizeof(phdr), sizeof(phdr));
501 if (phdr.p_type != ELF_PROG_LOAD)
503 // TODO: validate elf header fields!
504 // seg alloc creates PTE_U|PTE_W pages. if you ever want to change
505 // this, there will be issues with overlapping sections
506 _end = MAX(_end, (void*)(phdr.p_va + phdr.p_memsz));
507 env_segment_alloc(e, (void*SNT)phdr.p_va, phdr.p_memsz);
508 memcpy((void*)phdr.p_va, binary + phdr.p_offset, phdr.p_filesz);
509 memset((void*)phdr.p_va + phdr.p_filesz, 0,
510 phdr.p_memsz - phdr.p_filesz);
513 proc_set_program_counter(&e->env_tf, elfhdr.e_entry);
514 e->env_entry = elfhdr.e_entry;
516 // Now map USTACK_NUM_PAGES pages for the program's initial stack
517 // starting at virtual address USTACKTOP - USTACK_NUM_PAGES*PGSIZE.
518 env_segment_alloc(e, (void*SNT)(USTACKTOP - USTACK_NUM_PAGES*PGSIZE),
519 USTACK_NUM_PAGES*PGSIZE);
521 // reload the original address space
529 // Allocates a new env and loads the named elf binary into it.
531 env_t* env_create(uint8_t *binary, size_t size)
537 curid = (current ? current->env_id : 0);
538 if ((r = env_alloc(&e, curid)) < 0)
539 panic("env_create: %e", r);
541 /* Load the binary and set the current locations of the elf segments.
542 * All end-of-segment pointers are page aligned (invariant) */
543 e->end_text_segment = load_icode(e, binary, size);
544 e->end_data_segment = e->end_text_segment;
550 // Frees env e and all memory it uses.
557 // Note the environment's demise.
558 printk("[%08x] free env %08x\n", current ? current->env_id : 0, e->env_id);
559 // All parts of the kernel should have decref'd before env_free was called.
560 assert(e->env_refcnt == 0);
562 // Flush all mapped pages in the user portion of the address space
563 env_user_mem_free(e);
565 // free the page directory
569 page_decref(pa2page(pa));
571 //Free any memory allocated by this process
572 kfree(e->cache_colors_map);
574 // return the environment to the free list
576 spin_lock(&freelist_lock);
577 TAILQ_INSERT_HEAD(&proc_freelist, e, proc_link);
578 spin_unlock(&freelist_lock);
582 #define PER_CPU_THING(type,name)\
583 type SLOCKED(name##_lock) * RWPROTECT name;\
584 type SLOCKED(name##_lock) *\
585 (get_per_cpu_##name)()\
587 { R_PERMITTED(global(name))\
588 return &name[core_id()];\
593 /* This is the top-half of an interrupt handler, where the bottom half is
594 * proc_run (which never returns). Just add it to the delayed work queue,
595 * which (incidentally) can only hold one item at this point.
597 * Note this is rather old, and meant to run a RUNNABLE_S on a worker core.
600 void run_env_handler(trapframe_t *tf, env_t * data)
602 void run_env_handler(trapframe_t *tf, void * data)
606 struct work TP(env_t *) job;
607 struct workqueue TP(env_t *) *CT(1) workqueue =
608 TC(&per_cpu_info[core_id()].workqueue);
609 // this doesn't work, and making it a TP(env_t) is wrong
610 // zra: When you want to use other types, let me know, and I can help
611 // make something that Ivy is happy with.
615 job.func = (func_t)proc_run;
618 if (enqueue_work(workqueue, &job))
619 panic("Failed to enqueue work!");