1 /* See COPYRIGHT for copyright information. */
9 #include <arch/bitmask.h>
25 #include <ros/syscall.h>
26 #include <ros/error.h>
28 env_t *envs = NULL; // All environments
30 // TODO: make this a struct of info including the pointer and cacheline-align it
31 // This lets the kernel know what process is running on the core it traps into.
32 // A lot of the Env business, including this and its usage, will change when we
33 // redesign the env as a multi-process.
34 env_t* (RO curenvs)[MAX_NUM_CPUS] = {[0 ... (MAX_NUM_CPUS-1)] NULL};
36 #define ENVGENSHIFT 12 // >= LOGNENV
39 // Converts an envid to an env pointer.
42 // 0 on success, -EBADENV on error.
43 // On success, sets *env_store to the environment.
44 // On error, sets *env_store to NULL.
47 envid2env(envid_t envid, env_t **env_store, bool checkperm)
51 // If envid is zero, return the current environment.
57 // Look up the Env structure via the index part of the envid,
58 // then check the env_id field in that env_t
59 // to ensure that the envid is not stale
60 // (i.e., does not refer to a _previous_ environment
61 // that used the same slot in the envs[] array).
62 e = &envs[ENVX(envid)];
63 if (e->state == ENV_FREE || e->env_id != envid) {
68 // Check that the calling environment has legitimate permission
69 // to manipulate the specified environment.
70 // If checkperm is set, the specified environment
71 // must be either the current environment
72 // or an immediate child of the current environment.
73 // TODO: should check for current being null
74 if (checkperm && e != current && e->env_parent_id != current->env_id) {
84 // Mark all environments in 'envs' as free, set their env_ids to 0,
85 // and insert them into the proc_freelist.
86 // Insert in reverse order, so that the first call to env_alloc()
88 // TODO: get rid of this whole array bullshit
96 // core 0 is not idle, all others are (for now)
97 spin_lock(&idle_lock);
98 num_idlecores = num_cpus - 1;
99 for (i = 0; i < num_idlecores; i++)
100 idlecoremap[i] = i + 1;
101 spin_unlock(&idle_lock);
102 atomic_init(&num_envs, 0);
103 TAILQ_INIT(&proc_freelist);
104 assert(envs != NULL);
105 for (i = NENV-1; i >= 0; i--) {
106 // these should already be set from when i memset'd the array to 0
107 envs[i].state = ENV_FREE;
108 envs[i].end_text_segment = (void*)UTEXT;
109 envs[i].end_data_segment = (void*)UTEXT;
111 TAILQ_INSERT_HEAD(&proc_freelist, &envs[i], proc_link);
116 // Initialize the kernel virtual memory layout for environment e.
117 // Allocate a page directory, set e->env_pgdir and e->env_cr3 accordingly,
118 // and initialize the kernel portion of the new environment's address space.
119 // Do NOT (yet) map anything into the user portion
120 // of the environment's virtual address space.
122 // Returns 0 on success, < 0 on error. Errors include:
123 // -ENOMEM if page directory or table could not be allocated.
126 env_setup_vm(env_t *e)
127 WRITES(e->env_pgdir, e->env_cr3, e->env_procinfo, e->env_procdata)
130 page_t *pgdir = NULL;
131 page_t *pginfo[PROCINFO_NUM_PAGES] = {NULL};
132 page_t *pgdata[PROCDATA_NUM_PAGES] = {NULL};
133 static page_t * RO shared_page = 0;
136 * First, allocate a page for the pgdir of this process and up
137 * its reference count since this will never be done elsewhere
139 r = page_alloc(&pgdir);
144 * Next, set up the e->env_pgdir and e->env_cr3 pointers to point
145 * to this newly allocated page and clear its contents
147 memset(page2kva(pgdir), 0, PGSIZE);
148 e->env_pgdir = (pde_t *COUNT(NPDENTRIES)) TC(page2kva(pgdir));
149 e->env_cr3 = (physaddr_t) TC(page2pa(pgdir));
152 * Now start filling in the pgdir with mappings required by all newly
153 * created address spaces
156 // Map in the kernel to the top of every address space
157 // should be able to do this so long as boot_pgdir never has
158 // anything put below UTOP
159 // TODO check on this! had a nasty bug because of it
160 // this is a bit wonky, since if it's not PGSIZE, lots of other things are
162 memcpy(e->env_pgdir, boot_pgdir, NPDENTRIES*sizeof(pde_t));
164 // VPT and UVPT map the env's own page table, with
165 // different permissions.
166 e->env_pgdir[PDX(VPT)] = PTE(LA2PPN(e->env_cr3), PTE_P | PTE_KERN_RW);
167 e->env_pgdir[PDX(UVPT)] = PTE(LA2PPN(e->env_cr3), PTE_P | PTE_USER_RO);
170 * Now allocate and insert all pages required for the shared
171 * procinfo structure into the page table
173 for(int i=0; i<PROCINFO_NUM_PAGES; i++) {
174 if(page_alloc(&pginfo[i]) < 0)
175 goto env_setup_vm_error;
176 if(page_insert(e->env_pgdir, pginfo[i], (void*SNT)(UINFO + i*PGSIZE),
178 goto env_setup_vm_error;
182 * Now allocate and insert all pages required for the shared
183 * procdata structure into the page table
185 for(int i=0; i<PROCDATA_NUM_PAGES; i++) {
186 if(page_alloc(&pgdata[i]) < 0)
187 goto env_setup_vm_error;
188 if(page_insert(e->env_pgdir, pgdata[i], (void*SNT)(UDATA + i*PGSIZE),
190 goto env_setup_vm_error;
194 * Now, set e->env_procinfo, and e->env_procdata to point to
195 * the proper pages just allocated and clear them out.
197 e->env_procinfo = (procinfo_t *SAFE) TC(page2kva(pginfo[0]));
198 e->env_procdata = (procdata_t *SAFE) TC(page2kva(pgdata[0]));
200 memset(e->env_procinfo, 0, sizeof(procinfo_t));
201 memset(e->env_procdata, 0, sizeof(procdata_t));
203 /* Finally, set up the Global Shared Data page for all processes.
204 * Can't be trusted, but still very useful at this stage for us.
205 * Consider removing when we have real processes.
206 * (TODO). Note the page is alloced only the first time through
209 if(page_alloc(&shared_page) < 0)
210 goto env_setup_vm_error;
211 // Up it, so it never goes away. One per user, plus one from page_alloc
212 // This is necessary, since it's in the per-process range of memory that
213 // gets freed during page_free.
214 page_incref(shared_page);
217 // Inserted into every process's address space at UGDATA
218 if(page_insert(e->env_pgdir, shared_page, (void*SNT)UGDATA, PTE_USER_RW) < 0)
219 goto env_setup_vm_error;
224 page_free(shared_page);
225 for(int i=0; i< PROCDATA_NUM_PAGES; i++) {
226 page_free(pgdata[i]);
228 for(int i=0; i< PROCINFO_NUM_PAGES; i++) {
229 page_free(pginfo[i]);
231 env_user_mem_free(e);
237 proc_init_procinfo(struct proc* p)
239 p->env_procinfo->id = (p->env_id & 0x3FF);
241 // TODO: maybe do something smarter here
242 p->env_procinfo->max_harts = num_cpus-1;
246 // Allocates and initializes a new environment.
247 // On success, the new environment is stored in *newenv_store.
249 // Returns 0 on success, < 0 on failure. Errors include:
250 // -ENOFREEENV if all NENVS environments are allocated
251 // -ENOMEM on memory exhaustion
254 env_alloc(env_t **newenv_store, envid_t parent_id)
260 spin_lock(&freelist_lock);
261 e = TAILQ_FIRST(&proc_freelist);
263 TAILQ_REMOVE(&proc_freelist, e, proc_link);
264 spin_unlock(&freelist_lock);
266 spin_unlock(&freelist_lock);
272 // Allocate and set up the page directory for this environment.
273 if ((r = env_setup_vm(e)) < 0) {
274 spin_lock(&freelist_lock);
275 TAILQ_INSERT_HEAD(&proc_freelist, e, proc_link);
276 spin_unlock(&freelist_lock);
280 // Generate an env_id for this environment.
281 generation = (e->env_id + (1 << ENVGENSHIFT)) & ~(NENV - 1);
282 if (generation <= 0) // Don't create a negative env_id.
283 generation = 1 << ENVGENSHIFT;
284 e->env_id = generation | (e - envs);
286 // Set the basic status variables.
288 e->env_parent_id = parent_id;
289 proc_set_state(e, PROC_CREATED);
293 e->env_entry = 0; // cheating. this really gets set in load_icode
295 for (int i = 0; i < MAX_NUM_CPUS; i++)
297 e->cache_colors_map = kmalloc(llc_cache->num_colors, 0);
298 CLR_BITMASK(e->cache_colors_map, llc_cache->num_colors);
299 memset(&e->resources, 0, sizeof(e->resources));
301 memset(&e->env_ancillary_state, 0, sizeof(e->env_ancillary_state));
302 memset(&e->env_tf, 0, sizeof(e->env_tf));
303 proc_init_trapframe(&e->env_tf);
305 proc_init_procinfo(e);
308 * Initialize the contents of the e->env_procdata structure
310 // Initialize the generic syscall ring buffer
311 SHARED_RING_INIT(&e->env_procdata->syscallring);
312 // Initialize the backend of the syscall ring buffer
313 BACK_RING_INIT(&e->syscallbackring,
314 &e->env_procdata->syscallring,
317 // Initialize the generic sysevent ring buffer
318 SHARED_RING_INIT(&e->env_procdata->syseventring);
319 // Initialize the frontend of the sysevent ring buffer
320 FRONT_RING_INIT(&e->syseventfrontring,
321 &e->env_procdata->syseventring,
325 atomic_inc(&num_envs);
327 printk("[%08x] new env %08x\n", current ? current->env_id : 0, e->env_id);
333 // Allocate len bytes of physical memory for environment env,
334 // and map it at virtual address va in the environment's address space.
335 // Does not zero or otherwise initialize the mapped pages in any way.
336 // Pages should be writable by user and kernel.
337 // Panic if any allocation attempt fails.
340 env_segment_alloc(env_t *e, void *SNT va, size_t len)
342 void *SNT start, *SNT end;
348 start = ROUNDDOWN(va, PGSIZE);
349 end = ROUNDUP(va + len, PGSIZE);
351 panic("Wrap-around in memory allocation addresses!");
352 if ((uintptr_t)end > UTOP)
353 panic("Attempting to map above UTOP!");
354 // page_insert/pgdir_walk alloc a page and read/write to it via its address
355 // starting from pgdir (e's), so we need to be using e's pgdir
356 assert(e->env_cr3 == rcr3());
357 num_pages = LA2PPN(end - start);
359 for (i = 0; i < num_pages; i++, start += PGSIZE) {
360 // skip if a page is already mapped. yes, page_insert will page_remove
361 // whatever page was already there, but if we are seg allocing adjacent
362 // regions, we don't want to destroy that old mapping/page
363 // though later on we are told we can ignore this...
364 pte = pgdir_walk(e->env_pgdir, start, 0);
365 if (pte && *pte & PTE_P)
367 if ((r = page_alloc(&page)) < 0)
368 panic("env_segment_alloc: %e", r);
369 page_insert(e->env_pgdir, page, start, PTE_USER_RW);
374 env_segment_free(env_t *e, void *SNT va, size_t len)
376 void *SNT start, *SNT end;
381 // Round this up this time so we don't free the page that va is actually on
382 start = ROUNDUP(va, PGSIZE);
383 end = ROUNDUP(va + len, PGSIZE);
385 panic("Wrap-around in memory free addresses!");
386 if ((uintptr_t)end > UTOP)
387 panic("Attempting to unmap above UTOP!");
388 // page_insert/pgdir_walk alloc a page and read/write to it via its address
389 // starting from pgdir (e's), so we need to be using e's pgdir
390 assert(e->env_cr3 == rcr3());
391 num_pages = LA2PPN(end - start);
393 for (int i = 0; i < num_pages; i++, start += PGSIZE) {
394 // skip if a page is already unmapped.
395 pte = pgdir_walk(e->env_pgdir, start, 0);
396 if (pte && *pte & PTE_P)
397 page_remove(e->env_pgdir,start);
402 // Set up the initial program binary, stack, and processor flags
403 // for a user process.
405 // This function loads all loadable segments from the ELF binary image
406 // into the environment's user memory, starting at the appropriate
407 // virtual addresses indicated in the ELF program header.
408 // At the same time it clears to zero any portions of these segments
409 // that are marked in the program header as being mapped
410 // but not actually present in the ELF file - i.e., the program's bss section.
412 // Finally, this function maps one page for the program's initial stack.
414 load_icode(env_t *SAFE e, uint8_t *COUNT(size) binary, size_t size)
416 // asw: copy the headers because they might not be aligned.
420 memcpy(&elfhdr, binary, sizeof(elfhdr));
425 assert(elfhdr.e_magic == ELF_MAGIC);
426 // make sure we have proghdrs to load
427 assert(elfhdr.e_phnum);
429 // to actually access any pages alloc'd for this environment, we
430 // need to have the hardware use this environment's page tables.
431 uintreg_t old_cr3 = rcr3();
433 * Even though we'll decref later and no one should be killing us at this
434 * stage, we're still going to wrap the lcr3s with incref/decref.
436 * Note we never decref on the old_cr3, since we aren't willing to let it
437 * die. It's also not clear who the previous process is - sometimes it
438 * isn't even a process (when the kernel loads on its own, and not in
439 * response to a syscall). Probably need to think more about this (TODO)
441 * This can get a bit tricky if this code blocks (will need to think about a
442 * decref then), if we try to change states, etc.
447 // TODO: how do we do a runtime COUNT?
448 {TRUSTEDBLOCK // zra: TRUSTEDBLOCK until validation is done.
449 for (i = 0; i < elfhdr.e_phnum; i++) {
450 memcpy(&phdr, binary + elfhdr.e_phoff + i*sizeof(phdr), sizeof(phdr));
451 if (phdr.p_type != ELF_PROG_LOAD)
453 // TODO: validate elf header fields!
454 // seg alloc creates PTE_U|PTE_W pages. if you ever want to change
455 // this, there will be issues with overlapping sections
456 _end = MAX(_end, (void*)(phdr.p_va + phdr.p_memsz));
457 env_segment_alloc(e, (void*SNT)phdr.p_va, phdr.p_memsz);
458 memcpy((void*)phdr.p_va, binary + phdr.p_offset, phdr.p_filesz);
459 memset((void*)phdr.p_va + phdr.p_filesz, 0,
460 phdr.p_memsz - phdr.p_filesz);
463 proc_set_program_counter(&e->env_tf, elfhdr.e_entry);
464 e->env_entry = elfhdr.e_entry;
466 // Now map USTACK_NUM_PAGES pages for the program's initial stack
467 // starting at virtual address USTACKTOP - USTACK_NUM_PAGES*PGSIZE.
468 env_segment_alloc(e, (void*SNT)(USTACKTOP - USTACK_NUM_PAGES*PGSIZE),
469 USTACK_NUM_PAGES*PGSIZE);
471 // reload the original address space
479 // Allocates a new env and loads the named elf binary into it.
481 env_t* env_create(uint8_t *binary, size_t size)
487 curid = (current ? current->env_id : 0);
488 if ((r = env_alloc(&e, curid)) < 0)
489 panic("env_create: %e", r);
491 /* Load the binary and set the current locations of the elf segments.
492 * All end-of-segment pointers are page aligned (invariant) */
493 e->end_text_segment = load_icode(e, binary, size);
494 e->end_data_segment = e->end_text_segment;
500 // Frees env e and all memory it uses.
507 // Note the environment's demise.
508 printk("[%08x] free env %08x\n", current ? current->env_id : 0, e->env_id);
509 // All parts of the kernel should have decref'd before env_free was called.
510 assert(e->env_refcnt == 0);
512 // Flush all mapped pages in the user portion of the address space
513 env_user_mem_free(e);
515 // free the page directory
519 page_decref(pa2page(pa));
521 //Free any memory allocated by this process
522 kfree(e->cache_colors_map);
524 // return the environment to the free list
526 spin_lock(&freelist_lock);
527 TAILQ_INSERT_HEAD(&proc_freelist, e, proc_link);
528 spin_unlock(&freelist_lock);
532 #define PER_CPU_THING(type,name)\
533 type SLOCKED(name##_lock) * RWPROTECT name;\
534 type SLOCKED(name##_lock) *\
535 (get_per_cpu_##name)()\
537 { R_PERMITTED(global(name))\
538 return &name[core_id()];\
543 /* This is the top-half of an interrupt handler, where the bottom half is
544 * proc_run (which never returns). Just add it to the delayed work queue,
545 * which (incidentally) can only hold one item at this point.
547 * Note this is rather old, and meant to run a RUNNABLE_S on a worker core.
550 void run_env_handler(trapframe_t *tf, env_t * data)
552 void run_env_handler(trapframe_t *tf, void * data)
556 struct work TP(env_t *) job;
557 struct workqueue TP(env_t *) *CT(1) workqueue =
558 TC(&per_cpu_info[core_id()].workqueue);
559 // this doesn't work, and making it a TP(env_t) is wrong
560 // zra: When you want to use other types, let me know, and I can help
561 // make something that Ivy is happy with.
565 job.func = (func_t)proc_run;
568 if (enqueue_work(workqueue, &job))
569 panic("Failed to enqueue work!");