/* See COPYRIGHT for copyright information. */
-#ifdef __SHARC__
-#pragma nosharc
-#endif
-
//#define DEBUG
#include <ros/common.h>
+#include <ros/limits.h>
#include <arch/types.h>
#include <arch/arch.h>
#include <arch/mmu.h>
#include <trap.h>
#include <syscall.h>
#include <kmalloc.h>
+#include <profiler.h>
#include <stdio.h>
-#include <frontend.h>
-#include <colored_caches.h>
#include <hashtable.h>
#include <bitmask.h>
#include <vfs.h>
#include <smp.h>
#include <arsc_server.h>
#include <event.h>
+#include <kprof.h>
#include <termios.h>
#include <manager.h>
+#include <ros/procinfo.h>
-/* Tracing Globals */
-int systrace_flags = 0;
-struct systrace_record *systrace_buffer = 0;
-uint32_t systrace_bufidx = 0;
-size_t systrace_bufsize = 0;
-struct proc *systrace_procs[MAX_NUM_TRACED] = {0};
-spinlock_t systrace_lock = SPINLOCK_INITIALIZER_IRQSAVE;
+static int execargs_stringer(struct proc *p, char *d, size_t slen,
+ char *path, size_t path_l,
+ char *argenv, size_t argenv_l);
-// for now, only want this visible here.
-void kprof_write_sysrecord(char *pretty_buf, size_t len);
+/* Global, used by the kernel monitor for syscall debugging. */
+bool systrace_loud = FALSE;
-/* Not enforcing the packing of systrace_procs yet, but don't rely on that */
-static bool proc_is_traced(struct proc *p)
+/* Helper, given the trace record, pretty-print the trace's contents into the
+ * trace's pretty buf. 'entry' says whether we're an entry record or not
+ * (exit). Returns the number of bytes put into the pretty_buf. */
+static size_t systrace_fill_pretty_buf(struct systrace_record *trace,
+ bool entry)
{
- for (int i = 0; i < MAX_NUM_TRACED; i++)
- if (systrace_procs[i] == p)
- return true;
- return false;
+ size_t len = 0;
+ struct timespec ts_start = tsc2timespec(trace->start_timestamp);
+ struct timespec ts_end = tsc2timespec(trace->end_timestamp);
+
+ /* Slightly different formats between entry and exit. Entry has retval set
+ * to ---, and begins with E. Exit begins with X. */
+ if (entry) {
+ len = snprintf(trace->pretty_buf, SYSTR_PRETTY_BUF_SZ - len,
+ "E [%7d.%09d]-[%7d.%09d] Syscall %3d (%12s):(0x%llx, 0x%llx, "
+ "0x%llx, 0x%llx, 0x%llx, 0x%llx) ret: --- proc: %d core: %d "
+ "vcore: %d data: ",
+ ts_start.tv_sec,
+ ts_start.tv_nsec,
+ ts_end.tv_sec,
+ ts_end.tv_nsec,
+ trace->syscallno,
+ syscall_table[trace->syscallno].name,
+ trace->arg0,
+ trace->arg1,
+ trace->arg2,
+ trace->arg3,
+ trace->arg4,
+ trace->arg5,
+ trace->pid,
+ trace->coreid,
+ trace->vcoreid);
+ } else {
+ len = snprintf(trace->pretty_buf, SYSTR_PRETTY_BUF_SZ - len,
+ "X [%7d.%09d]-[%7d.%09d] Syscall %3d (%12s):(0x%llx, 0x%llx, "
+ "0x%llx, 0x%llx, 0x%llx, 0x%llx) ret: 0x%llx proc: %d core: %d "
+ "vcore: %d data: ",
+ ts_start.tv_sec,
+ ts_start.tv_nsec,
+ ts_end.tv_sec,
+ ts_end.tv_nsec,
+ trace->syscallno,
+ syscall_table[trace->syscallno].name,
+ trace->arg0,
+ trace->arg1,
+ trace->arg2,
+ trace->arg3,
+ trace->arg4,
+ trace->arg5,
+ trace->retval,
+ trace->pid,
+ trace->coreid,
+ trace->vcoreid);
+ }
+ len += printdump(trace->pretty_buf + len, trace->datalen,
+ SYSTR_PRETTY_BUF_SZ - len - 1,
+ trace->data);
+ len += snprintf(trace->pretty_buf + len, SYSTR_PRETTY_BUF_SZ - len, "\n");
+ return len;
}
-static bool __trace_this_proc(struct proc *p)
+/* If some syscalls block, then they can really hurt the user and the
+ * kernel. For instance, if you blocked another call because the trace queue is
+ * full, the 2LS will want to yield the vcore, but then *that* call would block
+ * too. Since that caller was in vcore context, the core will just spin
+ * forever.
+ *
+ * Even worse, some syscalls operate on the calling core or current context,
+ * thus accessing pcpui. If we block, then that old context is gone. Worse, we
+ * could migrate and then be operating on a different core. Imagine
+ * SYS_halt_core. Doh! */
+static bool sysc_can_block(unsigned int sysc_num)
+{
+ switch (sysc_num) {
+ case SYS_proc_yield:
+ case SYS_fork:
+ case SYS_exec:
+ case SYS_pop_ctx:
+ case SYS_getvcoreid:
+ case SYS_halt_core:
+ case SYS_vc_entry:
+ case SYS_change_vcore:
+ case SYS_change_to_m:
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/* Helper: spits out our trace to the various sinks. */
+static void systrace_output(struct systrace_record *trace,
+ struct strace *strace, bool entry)
{
- return (systrace_flags & SYSTRACE_ON) &&
- ((systrace_flags & SYSTRACE_ALLPROC) || (proc_is_traced(p)));
+ ERRSTACK(1);
+ size_t pretty_len;
+
+ /* qio ops can throw, especially the blocking qwrite. I had it block on the
+ * outbound path of sys_proc_destroy(). The rendez immediately throws. */
+ if (waserror()) {
+ poperror();
+ return;
+ }
+ pretty_len = systrace_fill_pretty_buf(trace, entry);
+ if (strace) {
+ /* At this point, we're going to emit the exit trace. It's just a
+ * question of whether or not we block while doing it. */
+ if (strace->drop_overflow || !sysc_can_block(trace->syscallno))
+ qiwrite(strace->q, trace->pretty_buf, pretty_len);
+ else
+ qwrite(strace->q, trace->pretty_buf, pretty_len);
+ }
+ if (systrace_loud)
+ printk("%s", trace->pretty_buf);
+ poperror();
}
-static size_t systrace_fill_pretty_buf(struct systrace_record *trace)
+static bool should_strace(struct proc *p, struct syscall *sysc)
{
- size_t len = 0;
- struct timespec ts_start;
- struct timespec ts_end;
- tsc2timespec(trace->start_timestamp, &ts_start);
- tsc2timespec(trace->end_timestamp, &ts_end);
-
- len = snprintf(trace->pretty_buf, SYSTR_PRETTY_BUF_SZ - len,
- "[%7d.%09d]-[%7d.%09d] Syscall %3d (%12s):(0x%llx, 0x%llx, "
- "0x%llx, 0x%llx, 0x%llx, 0x%llx) ret: 0x%llx proc: %d core: %d "
- "vcore: %d data: ",
- ts_start.tv_sec,
- ts_start.tv_nsec,
- ts_end.tv_sec,
- ts_end.tv_nsec,
- trace->syscallno,
- syscall_table[trace->syscallno].name,
- trace->arg0,
- trace->arg1,
- trace->arg2,
- trace->arg3,
- trace->arg4,
- trace->arg5,
- trace->retval,
- trace->pid,
- trace->coreid,
- trace->vcoreid);
- /* if we have extra data, print it out on the next line, lined up nicely.
- * this is only useful for looking at the dump in certain terminals. if we
- * have a tool that processes the info, we shouldn't do this. */
- if (trace->datalen)
- len += snprintf(trace->pretty_buf + len, SYSTR_PRETTY_BUF_SZ - len,
- "\n%67s", "");
- len += printdump(trace->pretty_buf + len,
- MIN(trace->datalen, SYSTR_PRETTY_BUF_SZ - len - 1),
- trace->data);
- len += snprintf(trace->pretty_buf + len, SYSTR_PRETTY_BUF_SZ - len, "\n");
- return len;
+ unsigned int sysc_num;
+
+ if (systrace_loud)
+ return TRUE;
+ if (!p->strace || !p->strace->tracing)
+ return FALSE;
+ /* TOCTTOU concerns - sysc is __user. */
+ sysc_num = ACCESS_ONCE(sysc->num);
+ if (qfull(p->strace->q)) {
+ if (p->strace->drop_overflow || !sysc_can_block(sysc_num)) {
+ atomic_inc(&p->strace->nr_drops);
+ return FALSE;
+ }
+ }
+ if (sysc_num > MAX_SYSCALL_NR)
+ return FALSE;
+ return test_bit(sysc_num, p->strace->trace_set);
+}
+
+/* Helper, copies len bytes from u_data to the trace->data, if there's room. */
+static void copy_tracedata_from_user(struct systrace_record *trace,
+ long u_data, size_t len)
+{
+ size_t copy_amt;
+
+ copy_amt = MIN(sizeof(trace->data) - trace->datalen, len);
+ copy_from_user(trace->data + trace->datalen, (void*)u_data, copy_amt);
+ trace->datalen += copy_amt;
+}
+
+/* Helper, snprintfs to the trace, if there's room. */
+static void snprintf_to_trace(struct systrace_record *trace, const char *fmt,
+ ...)
+{
+ va_list ap;
+ int rc;
+
+ va_start(ap, fmt);
+ rc = vsnprintf((char*)trace->data + trace->datalen,
+ sizeof(trace->data) - trace->datalen, fmt, ap);
+ va_end(ap);
+ if (!snprintf_error(rc, sizeof(trace->data) - trace->datalen))
+ trace->datalen += rc;
}
+/* Starts a trace for p running sysc, attaching it to kthread. Pairs with
+ * systrace_finish_trace(). */
static void systrace_start_trace(struct kthread *kthread, struct syscall *sysc)
{
- struct systrace_record *trace;
- int coreid, vcoreid;
struct proc *p = current;
+ struct systrace_record *trace;
- if (!__trace_this_proc(p))
- return;
- assert(!kthread->trace); /* catch memory leaks */
- coreid = core_id();
- vcoreid = proc_get_vcoreid(p);
- if (systrace_flags & SYSTRACE_LOUD) {
- printk("ENTER [%16llu] Syscall %3d (%12s):(0x%llx, 0x%llx, 0x%llx, "
- "0x%llx, 0x%llx, 0x%llx) proc: %d core: %d vcore: %d\n",
- read_tsc(),
- sysc->num, syscall_table[sysc->num].name,
- sysc->arg0, sysc->arg1, sysc->arg2, sysc->arg3, sysc->arg4,
- sysc->arg5, p->pid, coreid, vcoreid);
- }
- trace = kmalloc(SYSTR_BUF_SZ, 0);
- if (!trace)
+ kthread->strace = 0;
+ if (!should_strace(p, sysc))
return;
- kthread->trace = trace;
+ /* TODO: consider a block_alloc and qpass, though note that we actually
+ * write the same trace in twice (entry and exit). */
+ trace = kpages_alloc(SYSTR_BUF_SZ, MEM_ATOMIC);
+ if (p->strace) {
+ if (!trace) {
+ atomic_inc(&p->strace->nr_drops);
+ return;
+ }
+ /* Avoiding the atomic op. We sacrifice accuracy for less overhead. */
+ p->strace->appx_nr_sysc++;
+ } else {
+ if (!trace)
+ return;
+ }
+ /* if you ever need to debug just one strace function, this is
+ * handy way to do it: just bail out if it's not the one you
+ * want.
+ * if (sysc->num != SYS_exec)
+ * return; */
trace->start_timestamp = read_tsc();
+ trace->end_timestamp = 0;
trace->syscallno = sysc->num;
trace->arg0 = sysc->arg0;
trace->arg1 = sysc->arg1;
trace->arg3 = sysc->arg3;
trace->arg4 = sysc->arg4;
trace->arg5 = sysc->arg5;
+ trace->retval = 0;
trace->pid = p->pid;
- trace->coreid = coreid;
- trace->vcoreid = vcoreid;
+ trace->coreid = core_id();
+ trace->vcoreid = proc_get_vcoreid(p);
trace->pretty_buf = (char*)trace + sizeof(struct systrace_record);
trace->datalen = 0;
trace->data[0] = 0;
-}
+ switch (sysc->num) {
+ case SYS_write:
+ copy_tracedata_from_user(trace, sysc->arg1, sysc->arg2);
+ break;
+ case SYS_openat:
+ case SYS_chdir:
+ case SYS_rmdir:
+ case SYS_nmount:
+ copy_tracedata_from_user(trace, sysc->arg1, sysc->arg2);
+ break;
+ case SYS_stat:
+ case SYS_lstat:
+ case SYS_access:
+ case SYS_unlink:
+ case SYS_mkdir:
+ case SYS_wstat:
+ copy_tracedata_from_user(trace, sysc->arg0, sysc->arg1);
+ break;
+ case SYS_link:
+ case SYS_symlink:
+ case SYS_rename:
+ case SYS_nbind:
+ copy_tracedata_from_user(trace, sysc->arg0, sysc->arg1);
+ snprintf_to_trace(trace, " -> ");
+ copy_tracedata_from_user(trace, sysc->arg2, sysc->arg3);
+ break;
+ case SYS_nunmount:
+ copy_tracedata_from_user(trace, sysc->arg2, sysc->arg3);
+ break;
+ case SYS_exec:
+ trace->datalen = execargs_stringer(current,
+ (char *)trace->data,
+ sizeof(trace->data),
+ (char *)sysc->arg0,
+ sysc->arg1,
+ (char *)sysc->arg2,
+ sysc->arg3);
+ break;
+ case SYS_proc_create:
+ trace->datalen = execargs_stringer(current,
+ (char *)trace->data,
+ sizeof(trace->data),
+ (char *)sysc->arg0,
+ sysc->arg1,
+ (char *)sysc->arg2,
+ sysc->arg3);
+ break;
+ }
+ systrace_output(trace, p->strace, TRUE);
+
+ kthread->strace = trace;
+}
+
+/* Finishes the trace on kthread for p, with retval being the return from the
+ * syscall we're tracing. Pairs with systrace_start_trace(). */
static void systrace_finish_trace(struct kthread *kthread, long retval)
{
- struct systrace_record *trace = kthread->trace;
- size_t pretty_len;
- if (trace) {
- trace->end_timestamp = read_tsc();
- trace->retval = retval;
- kthread->trace = 0;
- pretty_len = systrace_fill_pretty_buf(trace);
- kprof_write_sysrecord(trace->pretty_buf, pretty_len);
- if (systrace_flags & SYSTRACE_LOUD)
- printk("EXIT %s", trace->pretty_buf);
- kfree(trace);
+ struct proc *p = current;
+ struct systrace_record *trace;
+
+ if (!kthread->strace)
+ return;
+ trace = kthread->strace;
+ trace->end_timestamp = read_tsc();
+ trace->retval = retval;
+
+ /* Only try to do the trace data if we didn't do it on entry */
+ if (!trace->datalen) {
+ switch (trace->syscallno) {
+ case SYS_read:
+ if (retval <= 0)
+ break;
+ copy_tracedata_from_user(trace, trace->arg1, retval);
+ break;
+ case SYS_readlink:
+ if (retval <= 0)
+ break;
+ copy_tracedata_from_user(trace, trace->arg0, trace->arg1);
+ snprintf_to_trace(trace, " -> ");
+ copy_tracedata_from_user(trace, trace->arg2, trace->arg3);
+ break;
+ }
}
+
+ systrace_output(trace, p->strace, FALSE);
+ kpages_free(kthread->strace, SYSTR_BUF_SZ);
+ kthread->strace = 0;
}
#ifdef CONFIG_SYSCALL_STRING_SAVING
static void alloc_sysc_str(struct kthread *kth)
{
- kth->name = kmalloc(SYSCALL_STRLEN, KMALLOC_WAIT);
+ kth->name = kmalloc(SYSCALL_STRLEN, MEM_ATOMIC);
+ if (!kth->name)
+ return;
kth->name[0] = 0;
}
static void free_sysc_str(struct kthread *kth)
{
char *str = kth->name;
+
kth->name = 0;
kfree(str);
}
#define sysc_save_str(...) \
{ \
struct per_cpu_info *pcpui = &per_cpu_info[core_id()]; \
- snprintf(pcpui->cur_kthread->name, SYSCALL_STRLEN, __VA_ARGS__); \
+ \
+ if (pcpui->cur_kthread->name) \
+ snprintf(pcpui->cur_kthread->name, SYSCALL_STRLEN, __VA_ARGS__); \
}
#else
* to not muck with the flags while we're signalling. */
atomic_or(&sysc->flags, SC_K_LOCK | SC_DONE);
__signal_syscall(sysc, p);
- atomic_and(&sysc->flags, ~SC_K_LOCK);
+ atomic_and(&sysc->flags, ~SC_K_LOCK);
}
/* Helper that "finishes" the current async syscall. This should be used with
pcpui->cur_kthread->sysc->errstr[0] = '\0';
}
-void set_errstr(const char *fmt, ...)
+void vset_errstr(const char *fmt, va_list ap)
{
- va_list ap;
- int rc;
-
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+
if (!pcpui->cur_kthread || !pcpui->cur_kthread->sysc)
return;
- va_start(ap, fmt);
- rc = vsnprintf(pcpui->cur_kthread->sysc->errstr, MAX_ERRSTR_LEN, fmt, ap);
- va_end(ap);
+ vsnprintf(pcpui->cur_kthread->sysc->errstr, MAX_ERRSTR_LEN, fmt, ap);
/* TODO: likely not needed */
pcpui->cur_kthread->sysc->errstr[MAX_ERRSTR_LEN - 1] = '\0';
}
+void set_errstr(const char *fmt, ...)
+{
+ va_list ap;
+
+ assert(fmt);
+ va_start(ap, fmt);
+ vset_errstr(fmt, ap);
+ va_end(ap);
+}
+
char *current_errstr(void)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
return pcpui->cur_kthread->sysc->errstr;
}
+void set_error(int error, const char *fmt, ...)
+{
+ va_list ap;
+
+ set_errno(error);
+
+ assert(fmt);
+ va_start(ap, fmt);
+ vset_errstr(fmt, ap);
+ va_end(ap);
+}
+
struct errbuf *get_cur_errbuf(void)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
- return (struct errbuf*)pcpui->cur_kthread->errbuf;
+ return pcpui->cur_kthread->errbuf;
}
void set_cur_errbuf(struct errbuf *ebuf)
return target;
}
+static int unpack_argenv(struct argenv *argenv, size_t argenv_l,
+ int *argc_p, char ***argv_p,
+ int *envc_p, char ***envp_p)
+{
+ int argc = argenv->argc;
+ int envc = argenv->envc;
+ char **argv = (char**)argenv->buf;
+ char **envp = argv + argc;
+ char *argbuf = (char*)(envp + envc);
+ uintptr_t argbuf_offset = (uintptr_t)(argbuf - (char*)(argenv));
+
+ if (((char*)argv - (char*)argenv) > argenv_l)
+ return -1;
+ if (((char*)argv + (argc * sizeof(char**)) - (char*)argenv) > argenv_l)
+ return -1;
+ if (((char*)envp - (char*)argenv) > argenv_l)
+ return -1;
+ if (((char*)envp + (envc * sizeof(char**)) - (char*)argenv) > argenv_l)
+ return -1;
+ if (((char*)argbuf - (char*)argenv) > argenv_l)
+ return -1;
+ for (int i = 0; i < argc; i++) {
+ if ((uintptr_t)(argv[i] + argbuf_offset) > argenv_l)
+ return -1;
+ argv[i] += (uintptr_t)argbuf;
+ }
+ for (int i = 0; i < envc; i++) {
+ if ((uintptr_t)(envp[i] + argbuf_offset) > argenv_l)
+ return -1;
+ envp[i] += (uintptr_t)argbuf;
+ }
+ *argc_p = argc;
+ *argv_p = argv;
+ *envc_p = envc;
+ *envp_p = envp;
+ return 0;
+}
+
/************** Utility Syscalls **************/
static int sys_null(void)
/* Diagnostic function: blocks the kthread/syscall, to help userspace test its
* async I/O handling. */
-static int sys_block(struct proc *p, unsigned int usec)
-{
- struct timer_chain *tchain = &per_cpu_info[core_id()].tchain;
- struct alarm_waiter a_waiter;
- init_awaiter(&a_waiter, 0);
- /* Note printing takes a few ms, so your printds won't be perfect. */
- printd("[kernel] sys_block(), sleeping at %llu\n", read_tsc());
- set_awaiter_rel(&a_waiter, usec);
- set_alarm(tchain, &a_waiter);
- sleep_on_awaiter(&a_waiter);
- printd("[kernel] sys_block(), waking up at %llu\n", read_tsc());
+static int sys_block(struct proc *p, unsigned long usec)
+{
+ sysc_save_str("block for %lu usec", usec);
+ kthread_usleep(usec);
return 0;
}
-// Writes 'val' to 'num_writes' entries of the well-known array in the kernel
-// address space. It's just #defined to be some random 4MB chunk (which ought
-// to be boot_alloced or something). Meant to grab exclusive access to cache
-// lines, to simulate doing something useful.
-static int sys_cache_buster(struct proc *p, uint32_t num_writes,
- uint32_t num_pages, uint32_t flags)
-{
- #define BUSTER_ADDR 0xd0000000L // around 512 MB deep
- #define MAX_WRITES 1048576*8
- #define MAX_PAGES 32
- #define INSERT_ADDR (UINFO + 2*PGSIZE) // should be free for these tests
- uint32_t* buster = (uint32_t*)BUSTER_ADDR;
- static spinlock_t buster_lock = SPINLOCK_INITIALIZER;
- uint64_t ticks = -1;
- page_t* a_page[MAX_PAGES];
-
- /* Strided Accesses or Not (adjust to step by cachelines) */
- uint32_t stride = 1;
- if (flags & BUSTER_STRIDED) {
- stride = 16;
- num_writes *= 16;
- }
-
- /* Shared Accesses or Not (adjust to use per-core regions)
- * Careful, since this gives 8MB to each core, starting around 512MB.
- * Also, doesn't separate memory for core 0 if it's an async call.
- */
- if (!(flags & BUSTER_SHARED))
- buster = (uint32_t*)(BUSTER_ADDR + core_id() * 0x00800000);
-
- /* Start the timer, if we're asked to print this info*/
- if (flags & BUSTER_PRINT_TICKS)
- ticks = start_timing();
+/* Pause execution for a number of nanoseconds.
+ * The current implementation rounds up to the nearest microsecond. If the
+ * syscall is aborted, we return the remaining time the call would have ran
+ * in the 'rem' parameter. */
+static int sys_nanosleep(struct proc *p,
+ const struct timespec *req,
+ struct timespec *rem)
+{
+ ERRSTACK(1);
+ uint64_t usec;
+ struct timespec kreq, krem = {0, 0};
+ uint64_t tsc = read_tsc();
- /* Allocate num_pages (up to MAX_PAGES), to simulate doing some more
- * realistic work. Note we don't write to these pages, even if we pick
- * unshared. Mostly due to the inconvenience of having to match up the
- * number of pages with the number of writes. And it's unnecessary.
- */
- if (num_pages) {
- spin_lock(&buster_lock);
- for (int i = 0; i < MIN(num_pages, MAX_PAGES); i++) {
- upage_alloc(p, &a_page[i],1);
- page_insert(p->env_pgdir, a_page[i], (void*)INSERT_ADDR + PGSIZE*i,
- PTE_USER_RW);
- page_decref(a_page[i]);
- }
- spin_unlock(&buster_lock);
+ /* Check the input arguments. */
+ if (memcpy_from_user(p, &kreq, req, sizeof(struct timespec))) {
+ set_errno(EFAULT);
+ return -1;
}
-
- if (flags & BUSTER_LOCKED)
- spin_lock(&buster_lock);
- for (int i = 0; i < MIN(num_writes, MAX_WRITES); i=i+stride)
- buster[i] = 0xdeadbeef;
- if (flags & BUSTER_LOCKED)
- spin_unlock(&buster_lock);
-
- if (num_pages) {
- spin_lock(&buster_lock);
- for (int i = 0; i < MIN(num_pages, MAX_PAGES); i++) {
- page_remove(p->env_pgdir, (void*)(INSERT_ADDR + PGSIZE * i));
- page_decref(a_page[i]);
- }
- spin_unlock(&buster_lock);
+ if (rem && memcpy_to_user(p, rem, &krem, sizeof(struct timespec))) {
+ set_errno(EFAULT);
+ return -1;
+ }
+ if (kreq.tv_sec < 0) {
+ set_errno(EINVAL);
+ return -1;
+ }
+ if ((kreq.tv_nsec < 0) || (kreq.tv_nsec > 999999999)) {
+ set_errno(EINVAL);
+ return -1;
}
- /* Print info */
- if (flags & BUSTER_PRINT_TICKS) {
- ticks = stop_timing(ticks);
- printk("%llu,", ticks);
+ /* Convert timespec to usec. Ignore overflow on the tv_sec field. */
+ usec = kreq.tv_sec * 1000000;
+ usec += DIV_ROUND_UP(kreq.tv_nsec, 1000);
+
+ /* Attempt to sleep. If we get aborted, copy the remaining time into
+ * 'rem' and return. We assume the tsc is sufficient to tell how much
+ * time is remaining (i.e. it only overflows on the order of hundreds of
+ * years, which should be sufficiently long enough to ensure we don't
+ * overflow). */
+ if (waserror()) {
+ krem = tsc2timespec(read_tsc() - tsc);
+ if (rem && memcpy_to_user(p, rem, &krem, sizeof(struct timespec)))
+ set_errno(EFAULT);
+ poperror();
+ return -1;
}
+ sysc_save_str("nanosleep for %d usec", usec);
+ kthread_usleep(usec);
+ poperror();
return 0;
}
/* sys_reboot(): called directly from dispatch table. */
-/* Print a string to the system console. */
-static ssize_t sys_cputs(struct proc *p, const char *DANGEROUS string,
- size_t strlen)
-{
- char *t_string;
- t_string = user_strdup_errno(p, string, strlen);
- if (!t_string)
- return -1;
- printk("%.*s", strlen, t_string);
- user_memdup_free(p, t_string);
- return (ssize_t)strlen;
-}
-
-// Read a character from the system console.
-// Returns the character.
-/* TODO: remove me */
-static uint16_t sys_cgetc(struct proc *p)
-{
- uint16_t c;
-
- // The cons_get_any_char() primitive doesn't wait for a character,
- // but the sys_cgetc() system call does.
- while ((c = cons_get_any_char()) == 0)
- cpu_relax();
-
- return c;
-}
-
/* Returns the id of the physical core this syscall is executed on. */
static uint32_t sys_getpcoreid(void)
{
/************** Process management syscalls **************/
-/* Returns the calling process's pid */
-static pid_t sys_getpid(struct proc *p)
+/* Helper for proc_create and fork */
+static void inherit_strace(struct proc *parent, struct proc *child)
{
- return p->pid;
+ if (parent->strace && parent->strace->inherit) {
+ /* Refcnt on both, put in the child's ->strace. */
+ kref_get(&parent->strace->users, 1);
+ kref_get(&parent->strace->procs, 1);
+ child->strace = parent->strace;
+ }
}
/* Creates a process from the file 'path'. The process is not runnable by
* default, so it needs it's status to be changed so that the next call to
- * schedule() will try to run it. TODO: take args/envs from userspace. */
+ * schedule() will try to run it. */
static int sys_proc_create(struct proc *p, char *path, size_t path_l,
- struct procinfo *pi, int flags)
+ char *argenv, size_t argenv_l, int flags)
{
int pid = 0;
char *t_path;
struct file *program;
struct proc *new_p;
+ int argc, envc;
+ char **argv, **envp;
+ struct argenv *kargenv;
- /* Copy in the path. Consider putting an upper bound on path_l. */
- t_path = user_strdup_errno(p, path, path_l);
+ t_path = copy_in_path(p, path, path_l);
if (!t_path)
return -1;
/* TODO: 9ns support */
- program = do_file_open(t_path, 0, 0);
- user_memdup_free(p, t_path);
+ program = do_file_open(t_path, O_READ, 0);
if (!program)
- return -1; /* presumably, errno is already set */
+ goto error_with_path;
+ if (!is_valid_elf(program)) {
+ set_errno(ENOEXEC);
+ goto error_with_file;
+ }
+ /* Check the size of the argenv array, error out if too large. */
+ if ((argenv_l < sizeof(struct argenv)) || (argenv_l > ARG_MAX)) {
+ set_error(EINVAL, "The argenv array has an invalid size: %lu\n",
+ argenv_l);
+ goto error_with_file;
+ }
+ /* Copy the argenv array into a kernel buffer. Delay processing of the
+ * array to load_elf(). */
+ kargenv = user_memdup_errno(p, argenv, argenv_l);
+ if (!kargenv) {
+ set_error(EINVAL, "Failed to copy in the args");
+ goto error_with_file;
+ }
+ /* Unpack the argenv array into more usable variables. Integrity checking
+ * done along side this as well. */
+ if (unpack_argenv(kargenv, argenv_l, &argc, &argv, &envc, &envp)) {
+ set_error(EINVAL, "Failed to unpack the args");
+ goto error_with_kargenv;
+ }
/* TODO: need to split the proc creation, since you must load after setting
* args/env, since auxp gets set up there. */
//new_p = proc_create(program, 0, 0);
if (proc_alloc(&new_p, current, flags)) {
- set_errstr("Failed to alloc new proc");
- goto mid_error;
+ set_error(ENOMEM, "Failed to alloc new proc");
+ goto error_with_kargenv;
}
+ inherit_strace(p, new_p);
/* close the CLOEXEC ones, even though this isn't really an exec */
- close_9ns_files(new_p, TRUE);
- close_all_files(&new_p->open_files, TRUE);
- /* Set the argument stuff needed by glibc */
- if (memcpy_from_user_errno(p, new_p->procinfo->argp, pi->argp,
- sizeof(pi->argp))) {
- set_errstr("Failed to memcpy argp");
- goto late_error;
- }
- if (memcpy_from_user_errno(p, new_p->procinfo->argbuf, pi->argbuf,
- sizeof(pi->argbuf))) {
- set_errstr("Failed to memcpy argbuf");
- goto late_error;
- }
- if (load_elf(new_p, program)) {
- set_errstr("Failed to load elf");
- goto late_error;
+ close_fdt(&new_p->open_files, TRUE);
+ /* Load the elf. */
+ if (load_elf(new_p, program, argc, argv, envc, envp)) {
+ set_error(EINVAL, "Failed to load elf");
+ goto error_with_proc;
}
/* progname is argv0, which accounts for symlinks */
- proc_set_progname(p, p->procinfo->argbuf);
+ proc_set_progname(new_p, argc ? argv[0] : NULL);
+ proc_replace_binary_path(new_p, t_path);
kref_put(&program->f_kref);
+ user_memdup_free(p, kargenv);
__proc_ready(new_p);
pid = new_p->pid;
+ profiler_notify_new_process(new_p);
proc_decref(new_p); /* give up the reference created in proc_create() */
return pid;
-late_error:
- set_errno(EINVAL);
+error_with_proc:
/* proc_destroy will decref once, which is for the ref created in
* proc_create(). We don't decref again (the usual "+1 for existing"),
* since the scheduler, which usually handles that, hasn't heard about the
* process (via __proc_ready()). */
proc_destroy(new_p);
-mid_error:
+error_with_kargenv:
+ user_memdup_free(p, kargenv);
+error_with_file:
kref_put(&program->f_kref);
+error_with_path:
+ free_path(p, t_path);
return -1;
}
printd("[%d] destroying proc %d\n", p->pid, p_to_die->pid);
}
proc_destroy(p_to_die);
- /* we only get here if we weren't the one to die */
proc_decref(p_to_die);
return 0;
}
static ssize_t sys_fork(env_t* e)
{
- struct proc *temp;
- int8_t state = 0;
+ uintptr_t temp;
int ret;
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
// TODO: right now we only support fork for single-core processes
if (e->state != PROC_RUNNING_S) {
assert(env != NULL);
proc_set_progname(env, e->progname);
- env->heap_top = e->heap_top;
- env->ppid = e->pid;
- disable_irqsave(&state); /* protect cur_ctx */
/* Can't really fork if we don't have a current_ctx to fork */
if (!current_ctx) {
proc_destroy(env);
set_errno(EINVAL);
return -1;
}
- env->scp_ctx = *current_ctx;
- enable_irqsave(&state);
-
- env->cache_colors_map = cache_colors_map_alloc();
- for(int i=0; i < llc_cache->num_colors; i++)
- if(GET_BITMASK_BIT(e->cache_colors_map,i))
- cache_color_alloc(llc_cache, env->cache_colors_map);
+ assert(pcpui->cur_proc == pcpui->owning_proc);
+ copy_current_ctx_to(&env->scp_ctx);
/* Make the new process have the same VMRs as the older. This will copy the
* contents of non MAP_SHARED pages to the new VMRs. */
finish_current_sysc(0);
switch_back(env, temp);
+ /* Copy some state from the original proc into the new proc. */
+ env->env_flags = e->env_flags;
+
+ inherit_strace(e, env);
+
/* In general, a forked process should be a fresh process, and we copy over
* whatever stuff is needed between procinfo/procdata. */
- /* Copy over the procinfo argument stuff in case they don't exec */
- memcpy(env->procinfo->argp, e->procinfo->argp, sizeof(e->procinfo->argp));
- memcpy(env->procinfo->argbuf, e->procinfo->argbuf,
- sizeof(e->procinfo->argbuf));
- #ifdef CONFIG_X86
- /* new guy needs to know about ldt (everything else in procdata is fresh */
- env->procdata->ldt = e->procdata->ldt;
- #endif
+ *env->procdata = *e->procdata;
+ env->procinfo->program_end = e->procinfo->program_end;
/* FYI: once we call ready, the proc is open for concurrent usage */
__proc_ready(env);
printd("[PID %d] fork PID %d\n", e->pid, env->pid);
ret = env->pid;
+ profiler_notify_new_process(env);
proc_decref(env); /* give up the reference created in proc_alloc() */
return ret;
}
+/* string for sys_exec arguments. Assumes that d is pointing to zero'd
+ * storage or storage that does not require null termination or
+ * provides the null. */
+static int execargs_stringer(struct proc *p, char *d, size_t slen,
+ char *path, size_t path_l,
+ char *argenv, size_t argenv_l)
+{
+ int argc, envc, i;
+ char **argv, **envp;
+ struct argenv *kargenv;
+ int amt;
+ char *s = d;
+ char *e = d + slen;
+
+ if (path_l > slen)
+ path_l = slen;
+ if (memcpy_from_user(p, d, path, path_l)) {
+ s = seprintf(s, e, "Invalid exec path");
+ return s - d;
+ }
+ s += path_l;
+
+ /* yes, this code is cloned from below. I wrote a helper but
+ * Barret and I concluded after talking about it that the
+ * helper was not really helper-ful, as it has almost 10
+ * arguments. Please, don't suggest a cpp macro. Thank you. */
+ /* Check the size of the argenv array, error out if too large. */
+ if ((argenv_l < sizeof(struct argenv)) || (argenv_l > ARG_MAX)) {
+ s = seprintf(s, e, "The argenv array has an invalid size: %lu\n",
+ argenv_l);
+ return s - d;
+ }
+ /* Copy the argenv array into a kernel buffer. */
+ kargenv = user_memdup_errno(p, argenv, argenv_l);
+ if (!kargenv) {
+ s = seprintf(s, e, "Failed to copy in the args and environment");
+ return s - d;
+ }
+ /* Unpack the argenv array into more usable variables. Integrity checking
+ * done along side this as well. */
+ if (unpack_argenv(kargenv, argenv_l, &argc, &argv, &envc, &envp)) {
+ s = seprintf(s, e, "Failed to unpack the args");
+ user_memdup_free(p, kargenv);
+ return s - d;
+ }
+ s = seprintf(s, e, "[%d]{", argc);
+ for (i = 0; i < argc; i++)
+ s = seprintf(s, e, "%s, ", argv[i]);
+ s = seprintf(s, e, "}");
+
+ user_memdup_free(p, kargenv);
+ return s - d;
+}
+
/* Load the binary "path" into the current process, and start executing it.
* argv and envp are magically bundled in procinfo for now. Keep in sync with
* glibc's sysdeps/ros/execve.c. Once past a certain point, this function won't
* return. It assumes (and checks) that it is current. Don't give it an extra
- * refcnt'd *p (syscall won't do that).
+ * refcnt'd *p (syscall won't do that).
* Note: if someone batched syscalls with this call, they could clobber their
* old memory (and will likely PF and die). Don't do it... */
static int sys_exec(struct proc *p, char *path, size_t path_l,
- struct procinfo *pi)
+ char *argenv, size_t argenv_l)
{
int ret = -1;
- char *t_path;
+ char *t_path = NULL;
struct file *program;
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
- int8_t state = 0;
+ int argc, envc;
+ char **argv, **envp;
+ struct argenv *kargenv;
/* We probably want it to never be allowed to exec if it ever was _M */
if (p->state != PROC_RUNNING_S) {
set_errno(EINVAL);
return -1;
}
- /* Copy in the path. Consider putting an upper bound on path_l. */
- t_path = user_strdup_errno(p, path, path_l);
- if (!t_path)
- return -1;
- disable_irqsave(&state); /* protect cur_ctx */
+
/* Can't exec if we don't have a current_ctx to restart (if we fail). This
* isn't 100% true, but I'm okay with it. */
if (!pcpui->cur_ctx) {
- enable_irqsave(&state);
set_errno(EINVAL);
return -1;
}
/* Preemptively copy out the cur_ctx, in case we fail later (easier on
* cur_ctx if we do this now) */
- p->scp_ctx = *pcpui->cur_ctx;
+ assert(pcpui->cur_proc == pcpui->owning_proc);
+ copy_current_ctx_to(&p->scp_ctx);
+ /* Check the size of the argenv array, error out if too large. */
+ if ((argenv_l < sizeof(struct argenv)) || (argenv_l > ARG_MAX)) {
+ set_error(EINVAL, "The argenv array has an invalid size: %lu\n",
+ argenv_l);
+ return -1;
+ }
+ /* Copy the argenv array into a kernel buffer. */
+ kargenv = user_memdup_errno(p, argenv, argenv_l);
+ if (!kargenv) {
+ set_errstr("Failed to copy in the args and environment");
+ return -1;
+ }
+ /* Unpack the argenv array into more usable variables. Integrity checking
+ * done along side this as well. */
+ if (unpack_argenv(kargenv, argenv_l, &argc, &argv, &envc, &envp)) {
+ user_memdup_free(p, kargenv);
+ set_error(EINVAL, "Failed to unpack the args");
+ return -1;
+ }
+ t_path = copy_in_path(p, path, path_l);
+ if (!t_path) {
+ user_memdup_free(p, kargenv);
+ return -1;
+ }
+ /* This could block: */
+ /* TODO: 9ns support */
+ program = do_file_open(t_path, O_READ, 0);
/* Clear the current_ctx. We won't be returning the 'normal' way. Even if
* we want to return with an error, we need to go back differently in case
* we succeed. This needs to be done before we could possibly block, but
* Note that we will 'hard block' if we block at all. We can't return to
* userspace and then asynchronously finish the exec later. */
clear_owning_proc(core_id());
- enable_irqsave(&state);
- /* This could block: */
- /* TODO: 9ns support */
- program = do_file_open(t_path, 0, 0);
- user_memdup_free(p, t_path);
if (!program)
goto early_error;
if (!is_valid_elf(program)) {
set_errno(ENOEXEC);
- goto early_error;
- }
- /* Set the argument stuff needed by glibc */
- if (memcpy_from_user_errno(p, p->procinfo->argp, pi->argp,
- sizeof(pi->argp)))
- goto mid_error;
- if (memcpy_from_user_errno(p, p->procinfo->argbuf, pi->argbuf,
- sizeof(pi->argbuf)))
goto mid_error;
+ }
/* This is the point of no return for the process. */
/* progname is argv0, which accounts for symlinks */
- proc_set_progname(p, p->procinfo->argbuf);
- #ifdef CONFIG_X86
- /* clear this, so the new program knows to get an LDT */
- p->procdata->ldt = 0;
- #endif
+ proc_replace_binary_path(p, t_path);
+ proc_set_progname(p, argc ? argv[0] : NULL);
+ proc_init_procdata(p);
+ p->procinfo->program_end = 0;
/* When we destroy our memory regions, accessing cur_sysc would PF */
pcpui->cur_kthread->sysc = 0;
unmap_and_destroy_vmrs(p);
/* close the CLOEXEC ones */
- close_9ns_files(p, TRUE);
- close_all_files(&p->open_files, TRUE);
+ close_fdt(&p->open_files, TRUE);
env_user_mem_free(p, 0, UMAPTOP);
- if (load_elf(p, program)) {
+ if (load_elf(p, program, argc, argv, envc, envp)) {
kref_put(&program->f_kref);
+ user_memdup_free(p, kargenv);
/* Note this is an inedible reference, but proc_destroy now returns */
proc_destroy(p);
/* We don't want to do anything else - we just need to not accidentally
* error value (errno is already set). */
kref_put(&program->f_kref);
early_error:
+ free_path(p, t_path);
finish_current_sysc(-1);
systrace_finish_trace(pcpui->cur_kthread, -1);
success:
+ user_memdup_free(p, kargenv);
free_sysc_str(pcpui->cur_kthread);
/* Here's how we restart the new (on success) or old (on failure) proc: */
spin_lock(&p->proc_lock);
- __unmap_vcore(p, 0); /* VC# keep in sync with proc_run_s */
+ __seq_start_write(&p->procinfo->coremap_seqctr);
+ __unmap_vcore(p, 0);
+ __seq_end_write(&p->procinfo->coremap_seqctr);
__proc_set_state(p, PROC_WAITING); /* fake a yield */
spin_unlock(&p->proc_lock);
proc_wakeup(p);
static pid_t try_wait(struct proc *parent, struct proc *child, int *ret_status,
int options)
{
- if (child->state == PROC_DYING) {
+ if (proc_is_dying(child)) {
/* Disown returns -1 if it's already been disowned or we should o/w
* abort. This can happen if we have concurrent waiters, both with
* pointers to the child (only one should reap). Note that if we don't
/* If we're dying, then we don't need to worry about waiting. We don't
* do this yet, but we'll need this outlet when we deal with orphaned
* children and having init inherit them. */
- if (parent->state == PROC_DYING)
+ if (proc_is_dying(parent))
goto out_unlock;
/* Any child can wake us up, but we check for the particular child we
* care about */
while (!retval) {
cpu_relax();
cv_wait(&parent->child_wait);
- if (parent->state == PROC_DYING)
+ if (proc_is_dying(parent))
goto out_unlock;
/* Any child can wake us up from the CV. This is a linear try_wait
* scan. If we have a lot of children, we could optimize this. */
pid_t retval = 0;
int ret_status = 0;
+ sysc_save_str("waitpid on %d", pid);
/* -1 is the signal for 'any child' */
if (pid == -1) {
retval = wait_any(parent, &ret_status, options);
}
static ssize_t sys_shared_page_alloc(env_t* p1,
- void**DANGEROUS _addr, pid_t p2_id,
+ void **_addr, pid_t p2_id,
int p1_flags, int p2_flags
)
{
return -1;
}
-static int sys_shared_page_free(env_t* p1, void*DANGEROUS addr, pid_t p2)
+static int sys_shared_page_free(env_t* p1, void *addr, pid_t p2)
{
return -1;
}
return prov_resource(0, res_type, res_val);
/* debugging interface */
if (target_pid == -1)
- print_prov_map();
+ print_coreprov_map();
set_errno(ESRCH);
return -1;
}
return 0;
}
+static int sys_send_event(struct proc *p, struct event_queue *ev_q,
+ struct event_msg *u_msg, uint32_t vcoreid)
+{
+ struct event_msg local_msg = {0};
+
+ if (memcpy_from_user(p, &local_msg, u_msg, sizeof(struct event_msg))) {
+ set_errno(EINVAL);
+ return -1;
+ }
+ send_event(p, ev_q, &local_msg, vcoreid);
+ return 0;
+}
+
/* Puts the calling core into vcore context, if it wasn't already, via a
* self-IPI / active notification. Barring any weird unmappings, we just send
* ourselves a __notify. */
* is trying to halt. The core need not abort the halt for notif_pending for
* the vcore, only for a __notify or other RKM. Anyone setting notif_pending
* should then attempt to __notify (o/w it's probably a bug). */
-static int sys_halt_core(struct proc *p, unsigned int usec)
+static int sys_halt_core(struct proc *p, unsigned long usec)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
struct preempt_data *vcpd;
return retval;
}
+/* Assists the user/2LS by atomically running *ctx and leaving vcore context.
+ * Normally, the user can do this themselves, but x86 VM contexts need kernel
+ * support. The caller ought to be in vcore context, and if a notif is pending,
+ * then the calling vcore will restart in a fresh VC ctx (as if it was notified
+ * or did a sys_vc_entry).
+ *
+ * Note that this will set the TLS too, which is part of the context. Parlib's
+ * pop_user_ctx currently does *not* do this, since the TLS is managed
+ * separately. If you want to use this syscall for testing, you'll need to 0
+ * out fsbase and conditionally write_msr in proc_pop_ctx(). */
+static int sys_pop_ctx(struct proc *p, struct user_context *ctx)
+{
+ int pcoreid = core_id();
+ struct per_cpu_info *pcpui = &per_cpu_info[pcoreid];
+ int vcoreid = pcpui->owning_vcoreid;
+ struct preempt_data *vcpd = &p->procdata->vcore_preempt_data[vcoreid];
+
+ /* With change_to, there's a bunch of concerns about changing the vcore map,
+ * since the kernel may have already locked and sent preempts, deaths, etc.
+ *
+ * In this case, we don't care as much. Other than notif_pending and
+ * notif_disabled, it's more like we're just changing a few registers in
+ * cur_ctx. We can safely order-after any kernel messages or other changes,
+ * as if the user had done all of the changes we'll make and then did a
+ * no-op syscall.
+ *
+ * Since we are mucking with current_ctx, it is important that we don't
+ * block before or during this syscall. */
+ arch_finalize_ctx(pcpui->cur_ctx);
+ if (copy_from_user(pcpui->cur_ctx, ctx, sizeof(struct user_context))) {
+ /* The 2LS isn't really in a position to handle errors. At the very
+ * least, we can print something and give them a fresh vc ctx. */
+ printk("[kernel] unable to copy user_ctx, 2LS bug\n");
+ memset(pcpui->cur_ctx, 0, sizeof(struct user_context));
+ proc_init_ctx(pcpui->cur_ctx, vcoreid, vcpd->vcore_entry,
+ vcpd->vcore_stack, vcpd->vcore_tls_desc);
+ return -1;
+ }
+ proc_secure_ctx(pcpui->cur_ctx);
+ /* The caller leaves vcore context no matter what. We'll put them back in
+ * if they missed a message. */
+ vcpd->notif_disabled = FALSE;
+ wrmb(); /* order disabled write before pending read */
+ if (vcpd->notif_pending)
+ send_kernel_message(pcoreid, __notify, (long)p, 0, 0, KMSG_ROUTINE);
+ return 0;
+}
+
+static int sys_vmm_add_gpcs(struct proc *p, unsigned int nr_more_gpcs,
+ struct vmm_gpcore_init *gpcis)
+{
+ ERRSTACK(1);
+ struct vmm *vmm = &p->vmm;
+
+ qlock(&vmm->qlock);
+ if (waserror()) {
+ qunlock(&vmm->qlock);
+ poperror();
+ return -1;
+ }
+ __vmm_struct_init(p);
+ __vmm_add_gpcs(p, nr_more_gpcs, gpcis);
+ qunlock(&vmm->qlock);
+ poperror();
+ return nr_more_gpcs;
+}
+
+static int sys_vmm_poke_guest(struct proc *p, int guest_pcoreid)
+{
+ return vmm_poke_guest(p, guest_pcoreid);
+}
+
+static int sys_vmm_ctl(struct proc *p, int cmd, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3,
+ unsigned long arg4)
+{
+ ERRSTACK(1);
+ int ret;
+ struct vmm *vmm = &p->vmm;
+
+ /* Protects against concurrent setters and for gets that are not atomic
+ * reads (say, multiple exec ctls). */
+ qlock(&vmm->qlock);
+ if (waserror()) {
+ qunlock(&vmm->qlock);
+ poperror();
+ return -1;
+ }
+ __vmm_struct_init(p);
+ switch (cmd) {
+ case VMM_CTL_GET_EXITS:
+ if (vmm->amd)
+ error(ENOTSUP, "AMD VMMs unsupported");
+ ret = vmx_ctl_get_exits(&vmm->vmx);
+ break;
+ case VMM_CTL_SET_EXITS:
+ if (arg1 & ~VMM_CTL_ALL_EXITS)
+ error(EINVAL, "Bad vmm_ctl_exits %x (%x)", arg1,
+ VMM_CTL_ALL_EXITS);
+ if (vmm->amd)
+ error(ENOTSUP, "AMD VMMs unsupported");
+ ret = vmx_ctl_set_exits(&vmm->vmx, arg1);
+ break;
+ case VMM_CTL_GET_FLAGS:
+ ret = vmm->flags;
+ break;
+ case VMM_CTL_SET_FLAGS:
+ if (arg1 & ~VMM_CTL_ALL_FLAGS)
+ error(EINVAL, "Bad vmm_ctl flags. Got 0x%lx, allowed 0x%lx\n",
+ arg1, VMM_CTL_ALL_FLAGS);
+ vmm->flags = arg1;
+ ret = 0;
+ break;
+ default:
+ error(EINVAL, "Bad vmm_ctl cmd %d", cmd);
+ }
+ qunlock(&vmm->qlock);
+ poperror();
+ return ret;
+}
+
/* Pokes the ksched for the given resource for target_pid. If the target pid
* == 0, we just poke for the calling process. The common case is poking for
- * self, so we avoid the lookup.
+ * self, so we avoid the lookup.
*
* Not sure if you could harm someone via asking the kernel to look at them, so
* we'll do a 'controls' check for now. In the future, we might have something
return populate_va(p, ROUNDDOWN(va, PGSIZE), nr_pgs);
}
-static intreg_t sys_read(struct proc *p, int fd, void *buf, int len)
+static intreg_t sys_read(struct proc *p, int fd, void *buf, size_t len)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
- struct systrace_record *t = pcpui->cur_kthread->trace;
ssize_t ret;
struct file *file = get_file_from_fd(&p->open_files, fd);
sysc_save_str("read on fd %d", fd);
/* plan9, should also handle errors (EBADF) */
ret = sysread(fd, buf, len);
}
-
- if ((ret > 0) && t) {
- t->datalen = MIN(sizeof(t->data), ret);
- memmove(t->data, buf, t->datalen);
- }
-
return ret;
}
-static intreg_t sys_write(struct proc *p, int fd, const void *buf, int len)
+static intreg_t sys_write(struct proc *p, int fd, const void *buf, size_t len)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
- struct systrace_record *t = pcpui->cur_kthread->trace;
ssize_t ret;
struct file *file = get_file_from_fd(&p->open_files, fd);
+
sysc_save_str("write on fd %d", fd);
/* VFS */
if (file) {
/* plan9, should also handle errors */
ret = syswrite(fd, (void*)buf, len);
}
-
- if (t) {
- t->datalen = MIN(sizeof(t->data), ret);
- memmove(t->data, buf, t->datalen);
- }
return ret;
-
}
-/* Checks args/reads in the path, opens the file, and inserts it into the
- * process's open file list. */
-static intreg_t sys_open(struct proc *p, const char *path, size_t path_l,
- int oflag, int mode)
+/* Checks args/reads in the path, opens the file (relative to fromfd if the path
+ * is not absolute), and inserts it into the process's open file list. */
+static intreg_t sys_openat(struct proc *p, int fromfd, const char *path,
+ size_t path_l, int oflag, int mode)
{
- struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
- struct systrace_record *t = pcpui->cur_kthread->trace;
int fd = -1;
- struct file *file;
+ struct file *file = 0;
+ char *t_path;
printd("File %s Open attempt oflag %x mode %x\n", path, oflag, mode);
- char *t_path = user_strdup_errno(p, path, path_l);
- if (!t_path)
+ if ((oflag & O_PATH) && (oflag & O_ACCMODE)) {
+ set_error(EINVAL, "Cannot open O_PATH with any I/O perms (O%o)", oflag);
return -1;
- if (t) {
- t->datalen = MIN(sizeof(t->data), path_l);
- memmove(t->data, t_path, path_l);
}
-
- /* Make sure only one of O_RDONLY, O_WRONLY, O_RDWR is specified in flag */
- if (((oflag & (O_RDONLY | O_WRONLY | O_RDWR)) != O_RDONLY) &&
- ((oflag & (O_RDONLY | O_WRONLY | O_RDWR)) != O_WRONLY) &&
- ((oflag & (O_RDONLY | O_WRONLY | O_RDWR)) != O_RDWR)) {
- set_errno(EINVAL);
- user_memdup_free(p, t_path);
+ t_path = copy_in_path(p, path, path_l);
+ if (!t_path)
return -1;
- }
-
- sysc_save_str("open %s", t_path);
+ sysc_save_str("open %s at fd %d", t_path, fromfd);
mode &= ~p->fs_env.umask;
- file = do_file_open(t_path, oflag, mode);
- /* VFS */
+ /* Only check the VFS for legacy opens. It doesn't support openat. Actual
+ * openats won't check here, and file == 0. */
+ if ((t_path[0] == '/') || (fromfd == AT_FDCWD))
+ file = do_file_open(t_path, oflag, mode);
+ else
+ set_errno(ENOENT); /* was not in the VFS. */
if (file) {
+ /* VFS lookup succeeded */
/* stores the ref to file */
fd = insert_file(&p->open_files, file, 0, FALSE, oflag & O_CLOEXEC);
kref_put(&file->f_kref); /* drop our ref */
if (fd < 0)
warn("File insertion failed");
} else if (get_errno() == ENOENT) {
+ /* VFS failed due to ENOENT. Other errors don't fall back to 9ns */
unset_errno(); /* Go can't handle extra errnos */
- fd = sysopen(t_path, oflag);
+ fd = sysopenat(fromfd, t_path, oflag);
/* successful lookup with CREATE and EXCL is an error */
if (fd != -1) {
if ((oflag & O_CREATE) && (oflag & O_EXCL)) {
set_errno(EEXIST);
sysclose(fd);
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
return -1;
}
} else {
}
}
}
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
printd("File %s Open, fd=%d\n", path, fd);
return fd;
}
}
/* 9ns, should also handle errors (bad FD, etc) */
retval = sysclose(fd);
- if (retval < 0) {
- /* no one checks their retvals. a double close will cause problems. */
- printk("[kernel] sys_close failed: proc %d fd %d. Check your rets.\n",
- p->pid, fd);
- }
return retval;
}
-/* kept around til we remove the last ufe */
-#define ufe(which,a0,a1,a2,a3) \
- frontend_syscall_errno(p,APPSERVER_SYSCALL_##which,\
- (int)(a0),(int)(a1),(int)(a2),(int)(a3))
-
static intreg_t sys_fstat(struct proc *p, int fd, struct kstat *u_stat)
{
struct kstat *kbuf;
{
struct kstat *kbuf;
struct dentry *path_d;
- char *t_path = user_strdup_errno(p, path, path_l);
+ char *t_path = copy_in_path(p, path, path_l);
int retval = 0;
if (!t_path)
return -1;
out_with_kbuf:
kfree(kbuf);
out_with_path:
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
return retval;
}
return stat_helper(p, path, path_l, u_stat, 0);
}
-intreg_t sys_fcntl(struct proc *p, int fd, int cmd, int arg)
+intreg_t sys_fcntl(struct proc *p, int fd, int cmd, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3, unsigned long arg4)
{
int retval = 0;
int newfd;
/* 9ns hack */
switch (cmd) {
case (F_DUPFD):
- return sysdup(fd, -1);
+ return sysdup(fd);
case (F_GETFD):
case (F_SETFD):
+ case (F_SYNC):
+ case (F_ADVISE):
+ /* TODO: 9ns versions */
return 0;
case (F_GETFL):
return fd_getfl(fd);
case (F_SETFL):
- return fd_setfl(fd, arg);
+ return fd_setfl(fd, arg1);
default:
warn("Unsupported fcntl cmd %d\n", cmd);
}
/* TODO: these are racy */
switch (cmd) {
case (F_DUPFD):
- retval = insert_file(&p->open_files, file, arg, FALSE, FALSE);
+ retval = insert_file(&p->open_files, file, arg1, FALSE, FALSE);
if (retval < 0) {
set_errno(-retval);
retval = -1;
/* I'm considering not supporting this at all. They must do it at
* open time or fix their buggy/racy code. */
spin_lock(&p->open_files.lock);
- if (arg & FD_CLOEXEC)
+ if (arg1 & FD_CLOEXEC)
p->open_files.fd[fd].fd_flags |= FD_CLOEXEC;
retval = p->open_files.fd[fd].fd_flags;
spin_unlock(&p->open_files.lock);
break;
case (F_SETFL):
/* only allowed to set certain flags. */
- arg &= O_FCNTL_FLAGS;
- file->f_flags = (file->f_flags & ~O_FCNTL_FLAGS) | arg;
+ arg1 &= O_FCNTL_SET_FLAGS;
+ file->f_flags = (file->f_flags & ~O_FCNTL_SET_FLAGS) | arg1;
+ break;
+ case (F_SYNC):
+ /* TODO (if we keep the VFS) */
+ retval = 0;
+ break;
+ case (F_ADVISE):
+ /* TODO (if we keep the VFS)*/
+ retval = 0;
break;
default:
warn("Unsupported fcntl cmd %d\n", cmd);
int mode)
{
int retval;
- char *t_path = user_strdup_errno(p, path, path_l);
+ char *t_path = copy_in_path(p, path, path_l);
if (!t_path)
return -1;
/* TODO: 9ns support */
retval = do_access(t_path, mode);
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
printd("Access for path: %s retval: %d\n", path, retval);
if (retval < 0) {
set_errno(-retval);
ret = file->f_op->llseek(file, tempoff, &retoff, whence);
kref_put(&file->f_kref);
} else {
- /* won't return here if error ... */
- ret = sysseek(fd, tempoff, whence);
- retoff = ret;
- ret = 0;
+ retoff = sysseek(fd, tempoff, whence);
+ ret = (retoff < 0);
}
if (ret)
char *new_path, size_t new_l)
{
int ret;
- char *t_oldpath = user_strdup_errno(p, old_path, old_l);
+ char *t_oldpath = copy_in_path(p, old_path, old_l);
if (t_oldpath == NULL)
return -1;
- char *t_newpath = user_strdup_errno(p, new_path, new_l);
+ char *t_newpath = copy_in_path(p, new_path, new_l);
if (t_newpath == NULL) {
- user_memdup_free(p, t_oldpath);
+ free_path(p, t_oldpath);
return -1;
}
ret = do_link(t_oldpath, t_newpath);
- user_memdup_free(p, t_oldpath);
- user_memdup_free(p, t_newpath);
+ free_path(p, t_oldpath);
+ free_path(p, t_newpath);
return ret;
}
intreg_t sys_unlink(struct proc *p, const char *path, size_t path_l)
{
int retval;
- char *t_path = user_strdup_errno(p, path, path_l);
+ char *t_path = copy_in_path(p, path, path_l);
if (!t_path)
return -1;
retval = do_unlink(t_path);
unset_errno();
retval = sysremove(t_path);
}
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
return retval;
}
char *new_path, size_t new_l)
{
int ret;
- char *t_oldpath = user_strdup_errno(p, old_path, old_l);
+ char *t_oldpath = copy_in_path(p, old_path, old_l);
if (t_oldpath == NULL)
return -1;
- char *t_newpath = user_strdup_errno(p, new_path, new_l);
+ char *t_newpath = copy_in_path(p, new_path, new_l);
if (t_newpath == NULL) {
- user_memdup_free(p, t_oldpath);
+ free_path(p, t_oldpath);
return -1;
}
ret = do_symlink(t_newpath, t_oldpath, S_IRWXU | S_IRWXG | S_IRWXO);
- user_memdup_free(p, t_oldpath);
- user_memdup_free(p, t_newpath);
+ free_path(p, t_oldpath);
+ free_path(p, t_newpath);
return ret;
}
ssize_t copy_amt;
int ret = -1;
struct dentry *path_d;
- char *t_path = user_strdup_errno(p, path, path_l);
+ char *t_path = copy_in_path(p, path, path_l);
if (t_path == NULL)
return -1;
/* TODO: 9ns support */
path_d = lookup_dentry(t_path, 0);
if (!path_d){
int n = 2048;
- buf = kmalloc(n*2, KMALLOC_WAIT);
+ buf = kmalloc(n*2, MEM_WAIT);
struct dir *d = (void *)&buf[n];
/* try 9ns. */
if (sysstat(t_path, buf, n) > 0) {
} else
symname = path_d->d_inode->i_op->readlink(path_d);
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
if (symname){
copy_amt = strnlen(symname, buf_l - 1) + 1;
- if (! memcpy_to_user_errno(p, u_buf, symname, copy_amt))
+ if (!memcpy_to_user_errno(p, u_buf, symname, copy_amt))
ret = copy_amt - 1;
}
if (path_d)
return ret;
}
-static intreg_t sys_chdir(struct proc *p, pid_t pid, const char *path, size_t path_l)
+static intreg_t sys_chdir(struct proc *p, pid_t pid, const char *path,
+ size_t path_l)
{
int retval;
char *t_path;
struct proc *target = get_controllable_proc(p, pid);
if (!target)
return -1;
- t_path = user_strdup_errno(p, path, path_l);
+ t_path = copy_in_path(p, path, path_l);
if (!t_path) {
proc_decref(target);
return -1;
}
/* TODO: 9ns support */
retval = do_chdir(&target->fs_env, t_path);
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
proc_decref(target);
return retval;
}
{
int retval = 0;
char *kfree_this;
- char *k_cwd = do_getcwd(&p->fs_env, &kfree_this, cwd_l);
+ char *k_cwd;
+ k_cwd = do_getcwd(&p->fs_env, &kfree_this, cwd_l);
if (!k_cwd)
return -1; /* errno set by do_getcwd */
- if (memcpy_to_user_errno(p, u_cwd, k_cwd, strnlen(k_cwd, cwd_l - 1) + 1))
+ if (strlen(k_cwd) + 1 > cwd_l) {
+ set_error(ERANGE, "getcwd buf too small, needed %d", strlen(k_cwd) + 1);
retval = -1;
- retval = strnlen(k_cwd, cwd_l - 1);
+ goto out;
+ }
+ if (memcpy_to_user_errno(p, u_cwd, k_cwd, strlen(k_cwd) + 1))
+ retval = -1;
+out:
kfree(kfree_this);
return retval;
}
intreg_t sys_mkdir(struct proc *p, const char *path, size_t path_l, int mode)
{
int retval;
- char *t_path = user_strdup_errno(p, path, path_l);
+ char *t_path = copy_in_path(p, path, path_l);
if (!t_path)
return -1;
mode &= S_PMASK;
static_assert(!(S_PMASK & DMDIR));
retval = syscreate(t_path, O_RDWR, DMDIR | mode);
}
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
return retval;
}
intreg_t sys_rmdir(struct proc *p, const char *path, size_t path_l)
{
int retval;
- char *t_path = user_strdup_errno(p, path, path_l);
+ char *t_path = copy_in_path(p, path, path_l);
if (!t_path)
return -1;
/* TODO: 9ns support */
retval = do_rmdir(t_path);
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
return retval;
}
-intreg_t sys_pipe(struct proc *p, int *u_pipefd, int flags)
-{
- int pipefd[2] = {0};
- int retval = syspipe(pipefd);
-
- if (retval)
- return -1;
- if (memcpy_to_user_errno(p, u_pipefd, pipefd, sizeof(pipefd))) {
- sysclose(pipefd[0]);
- sysclose(pipefd[1]);
- set_errno(EFAULT);
- return -1;
- }
- return 0;
-}
-
-intreg_t sys_gettimeofday(struct proc *p, int *buf)
-{
- static spinlock_t gtod_lock = SPINLOCK_INITIALIZER;
- static int t0 = 0;
-
- spin_lock(>od_lock);
- if(t0 == 0)
-
-#if (defined CONFIG_APPSERVER)
- t0 = ufe(time,0,0,0,0);
-#else
- // Nanwan's birthday, bitches!!
- t0 = 1242129600;
-#endif
- spin_unlock(>od_lock);
-
- long long dt = read_tsc();
- /* TODO: This probably wants its own function, using a struct timeval */
- long kbuf[2] = {t0+dt/system_timing.tsc_freq,
- (dt%system_timing.tsc_freq)*1000000/system_timing.tsc_freq};
-
- return memcpy_to_user_errno(p,buf,kbuf,sizeof(kbuf));
-}
-
intreg_t sys_tcgetattr(struct proc *p, int fd, void *termios_p)
{
int retval = 0;
{
int ret;
- char *t_srcpath = user_strdup_errno(p, src_path, src_l);
+ char *t_srcpath = copy_in_path(p, src_path, src_l);
if (t_srcpath == NULL) {
printd("srcpath dup failed ptr %p size %d\n", src_path, src_l);
return -1;
}
- char *t_ontopath = user_strdup_errno(p, onto_path, onto_l);
+ char *t_ontopath = copy_in_path(p, onto_path, onto_l);
if (t_ontopath == NULL) {
- user_memdup_free(p, t_srcpath);
+ free_path(p, t_srcpath);
printd("ontopath dup failed ptr %p size %d\n", onto_path, onto_l);
return -1;
}
printd("sys_nbind: %s -> %s flag %d\n", t_srcpath, t_ontopath, flag);
ret = sysbind(t_srcpath, t_ontopath, flag);
- user_memdup_free(p, t_srcpath);
- user_memdup_free(p, t_ontopath);
+ free_path(p, t_srcpath);
+ free_path(p, t_ontopath);
return ret;
}
int afd;
afd = -1;
- char *t_ontopath = user_strdup_errno(p, onto_path, onto_l);
+ char *t_ontopath = copy_in_path(p, onto_path, onto_l);
if (t_ontopath == NULL)
return -1;
- ret = sysmount(fd, afd, t_ontopath, flag, /* spec or auth */"");
- user_memdup_free(p, t_ontopath);
+ ret = sysmount(fd, afd, t_ontopath, flag, /* spec or auth */"/");
+ free_path(p, t_ontopath);
return ret;
}
-/* int mount(int fd, int afd, char* old, int flag, char* aname); */
-intreg_t sys_nunmount(struct proc *p, char *name, int name_l, char *old_path, int old_l)
+/* Unmount undoes the operation of a bind or mount. Check out
+ * http://plan9.bell-labs.com/magic/man2html/1/bind . Though our mount takes an
+ * FD, not servename (aka src_path), so it's not quite the same.
+ *
+ * To translate between Plan 9 and Akaros, old -> onto_path. new -> src_path.
+ *
+ * For unmount, src_path / new is optional. If set, we only unmount the
+ * bindmount that came from src_path. */
+intreg_t sys_nunmount(struct proc *p, char *src_path, int src_l,
+ char *onto_path, int onto_l)
{
int ret;
- char *t_oldpath = user_strdup_errno(p, old_path, old_l);
- if (t_oldpath == NULL)
- return -1;
- char *t_name = user_strdup_errno(p, name, name_l);
- if (t_name == NULL) {
- user_memdup_free(p, t_oldpath);
+ char *t_ontopath, *t_srcpath;
+ t_ontopath = copy_in_path(p, onto_path, onto_l);
+ if (t_ontopath == NULL)
return -1;
+ if (src_path) {
+ t_srcpath = copy_in_path(p, src_path, src_l);
+ if (t_srcpath == NULL) {
+ free_path(p, t_ontopath);
+ return -1;
+ }
+ } else {
+ t_srcpath = 0;
}
- ret = sysunmount(t_name, t_oldpath);
- printd("go do it\n");
- user_memdup_free(p, t_oldpath);
- user_memdup_free(p, t_name);
+ ret = sysunmount(t_srcpath, t_ontopath);
+ free_path(p, t_ontopath);
+ free_path(p, t_srcpath); /* you can free a null path */
return ret;
}
intreg_t sys_fd2path(struct proc *p, int fd, void *u_buf, size_t len)
{
- int ret;
+ int ret = 0;
struct chan *ch;
ERRSTACK(1);
/* UMEM: Check the range, can PF later and kill if the page isn't present */
poperror();
return -1;
}
- ch = fdtochan(current->fgrp, fd, -1, FALSE, TRUE);
- ret = snprintf(u_buf, len, "%s", channame(ch));
+ ch = fdtochan(¤t->open_files, fd, -1, FALSE, TRUE);
+ if (snprintf(u_buf, len, "%s", channame(ch)) >= len) {
+ set_error(ERANGE, "fd2path buf too small, needed %d", ret);
+ ret = -1;
+ }
cclose(ch);
poperror();
return ret;
int m_sz;
int retval = 0;
- dir = kzmalloc(sizeof(struct dir) + stat_sz, KMALLOC_WAIT);
+ dir = kzmalloc(sizeof(struct dir) + stat_sz, MEM_WAIT);
m_sz = convM2D(stat_m, stat_sz, &dir[0], (char*)&dir[1]);
if (m_sz != stat_sz) {
- set_errstr(Eshortstat);
- set_errno(EINVAL);
+ set_error(EINVAL, ERROR_FIXME);
kfree(dir);
return -1;
}
uint8_t *stat_m, size_t stat_sz, int flags)
{
int retval = 0;
- char *t_path = user_strdup_errno(p, path, path_l);
+ char *t_path = copy_in_path(p, path, path_l);
struct file *file;
if (!t_path)
return -1;
retval = syswstat(t_path, stat_m, stat_sz);
if (retval == stat_sz) {
- user_memdup_free(p, t_path);
+ free_path(p, t_path);
return stat_sz;
}
/* 9ns failed, we'll need to check the VFS */
- file = do_file_open(t_path, 0, 0);
- user_memdup_free(p, t_path);
+ file = do_file_open(t_path, O_READ, 0);
+ free_path(p, t_path);
if (!file)
return -1;
retval = vfs_wstat(file, stat_m, stat_sz, flags);
char *new_path, size_t new_path_l)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
- struct systrace_record *t = pcpui->cur_kthread->trace;
ERRSTACK(1);
int mountpointlen = 0;
- char *from_path = user_strdup_errno(p, old_path, old_path_l);
- char *to_path = user_strdup_errno(p, new_path, new_path_l);
+ char *from_path = copy_in_path(p, old_path, old_path_l);
+ char *to_path = copy_in_path(p, new_path, new_path_l);
struct chan *oldchan = 0, *newchan = NULL;
int retval = -1;
if ((!from_path) || (!to_path))
return -1;
printd("sys_rename :%s: to :%s: : ", from_path, to_path);
- if (t) {
- t->datalen = snprintf((char *)t->data, sizeof(t->data), "Rename :%s: to :%s:", from_path, to_path);
- }
/* we need a fid for the wstat. */
/* TODO: maybe wrap the 9ns stuff better. sysrename maybe? */
poperror();
if (!oldchan) {
retval = do_rename(from_path, to_path);
- user_memdup_free(p, from_path);
- user_memdup_free(p, to_path);
+ free_path(p, from_path);
+ free_path(p, to_path);
return retval;
}
printd("Newchan: %C\n", newchan);
printd("Newchan: mchan %C\n", newchan->mchan);
- if ((newchan->dev != oldchan->dev) ||
+ if ((newchan->dev != oldchan->dev) ||
(newchan->type != oldchan->type)) {
printd("Old chan and new chan do not match\n");
set_errno(ENODEV);
}
mlen = convD2M(&dir, mbuf, sizeof(mbuf));
- if (! mlen) {
+ if (!mlen) {
printk("convD2M failed\n");
set_errno(EINVAL);
goto done;
};
printk("syswstat returns %d\n", retval);
-done:
- user_memdup_free(p, from_path);
- user_memdup_free(p, to_path);
+done:
+ free_path(p, from_path);
+ free_path(p, to_path);
cclose(oldchan);
cclose(newchan);
return retval;
}
+/* Careful: if an FD is busy, we don't close the old object, it just fails */
static intreg_t sys_dup_fds_to(struct proc *p, unsigned int pid,
struct childfdmap *map, unsigned int nentries)
{
return ret;
}
+/* 0 on success, anything else is an error, with errno/errstr set */
+static int handle_tap_req(struct proc *p, struct fd_tap_req *req)
+{
+ switch (req->cmd) {
+ case (FDTAP_CMD_ADD):
+ return add_fd_tap(p, req);
+ case (FDTAP_CMD_REM):
+ return remove_fd_tap(p, req->fd);
+ default:
+ set_error(ENOSYS, "FD Tap Command %d not supported", req->cmd);
+ return -1;
+ }
+}
+
+/* Processes up to nr_reqs tap requests. If a request errors out, we stop
+ * immediately. Returns the number processed. If done != nr_reqs, check errno
+ * and errstr for the last failure, which is for tap_reqs[done]. */
+static intreg_t sys_tap_fds(struct proc *p, struct fd_tap_req *tap_reqs,
+ size_t nr_reqs)
+{
+ struct fd_tap_req *req_i = tap_reqs;
+ int done;
+ if (!is_user_rwaddr(tap_reqs, sizeof(struct fd_tap_req) * nr_reqs)) {
+ set_errno(EINVAL);
+ return 0;
+ }
+ for (done = 0; done < nr_reqs; done++, req_i++) {
+ if (handle_tap_req(p, req_i))
+ break;
+ }
+ return done;
+}
+
/************** Syscall Invokation **************/
const struct sys_table_entry syscall_table[] = {
[SYS_null] = {(syscall_t)sys_null, "null"},
[SYS_block] = {(syscall_t)sys_block, "block"},
- [SYS_cache_buster] = {(syscall_t)sys_cache_buster, "buster"},
[SYS_cache_invalidate] = {(syscall_t)sys_cache_invalidate, "wbinv"},
[SYS_reboot] = {(syscall_t)reboot, "reboot!"},
- [SYS_cputs] = {(syscall_t)sys_cputs, "cputs"},
- [SYS_cgetc] = {(syscall_t)sys_cgetc, "cgetc"},
[SYS_getpcoreid] = {(syscall_t)sys_getpcoreid, "getpcoreid"},
[SYS_getvcoreid] = {(syscall_t)sys_getvcoreid, "getvcoreid"},
- [SYS_getpid] = {(syscall_t)sys_getpid, "getpid"},
[SYS_proc_create] = {(syscall_t)sys_proc_create, "proc_create"},
[SYS_proc_run] = {(syscall_t)sys_proc_run, "proc_run"},
[SYS_proc_destroy] = {(syscall_t)sys_proc_destroy, "proc_destroy"},
- [SYS_yield] = {(syscall_t)sys_proc_yield, "proc_yield"},
+ [SYS_proc_yield] = {(syscall_t)sys_proc_yield, "proc_yield"},
[SYS_change_vcore] = {(syscall_t)sys_change_vcore, "change_vcore"},
[SYS_fork] = {(syscall_t)sys_fork, "fork"},
[SYS_exec] = {(syscall_t)sys_exec, "exec"},
[SYS_provision] = {(syscall_t)sys_provision, "provision"},
[SYS_notify] = {(syscall_t)sys_notify, "notify"},
[SYS_self_notify] = {(syscall_t)sys_self_notify, "self_notify"},
+ [SYS_send_event] = {(syscall_t)sys_send_event, "send_event"},
[SYS_vc_entry] = {(syscall_t)sys_vc_entry, "vc_entry"},
[SYS_halt_core] = {(syscall_t)sys_halt_core, "halt_core"},
#ifdef CONFIG_ARSC_SERVER
[SYS_init_arsc] = {(syscall_t)sys_init_arsc, "init_arsc"},
#endif
[SYS_change_to_m] = {(syscall_t)sys_change_to_m, "change_to_m"},
+ [SYS_vmm_add_gpcs] = {(syscall_t)sys_vmm_add_gpcs, "vmm_add_gpcs"},
+ [SYS_vmm_poke_guest] = {(syscall_t)sys_vmm_poke_guest, "vmm_poke_guest"},
+ [SYS_vmm_ctl] = {(syscall_t)sys_vmm_ctl, "vmm_ctl"},
[SYS_poke_ksched] = {(syscall_t)sys_poke_ksched, "poke_ksched"},
[SYS_abort_sysc] = {(syscall_t)sys_abort_sysc, "abort_sysc"},
[SYS_abort_sysc_fd] = {(syscall_t)sys_abort_sysc_fd, "abort_sysc_fd"},
[SYS_populate_va] = {(syscall_t)sys_populate_va, "populate_va"},
+ [SYS_nanosleep] = {(syscall_t)sys_nanosleep, "nanosleep"},
+ [SYS_pop_ctx] = {(syscall_t)sys_pop_ctx, "pop_ctx"},
[SYS_read] = {(syscall_t)sys_read, "read"},
[SYS_write] = {(syscall_t)sys_write, "write"},
- [SYS_open] = {(syscall_t)sys_open, "open"},
+ [SYS_openat] = {(syscall_t)sys_openat, "openat"},
[SYS_close] = {(syscall_t)sys_close, "close"},
[SYS_fstat] = {(syscall_t)sys_fstat, "fstat"},
[SYS_stat] = {(syscall_t)sys_stat, "stat"},
[SYS_getcwd] = {(syscall_t)sys_getcwd, "getcwd"},
[SYS_mkdir] = {(syscall_t)sys_mkdir, "mkdir"},
[SYS_rmdir] = {(syscall_t)sys_rmdir, "rmdir"},
- [SYS_pipe] = {(syscall_t)sys_pipe, "pipe"},
- [SYS_gettimeofday] = {(syscall_t)sys_gettimeofday, "gettime"},
[SYS_tcgetattr] = {(syscall_t)sys_tcgetattr, "tcgetattr"},
[SYS_tcsetattr] = {(syscall_t)sys_tcsetattr, "tcsetattr"},
[SYS_setuid] = {(syscall_t)sys_setuid, "setuid"},
[SYS_fwstat] ={(syscall_t)sys_fwstat, "fwstat"},
[SYS_rename] ={(syscall_t)sys_rename, "rename"},
[SYS_dup_fds_to] = {(syscall_t)sys_dup_fds_to, "dup_fds_to"},
+ [SYS_tap_fds] = {(syscall_t)sys_tap_fds, "tap_fds"},
};
const int max_syscall = sizeof(syscall_table)/sizeof(syscall_table[0]);
+
/* Executes the given syscall.
*
* Note tf is passed in, which points to the tf of the context on the kernel
void run_local_syscall(struct syscall *sysc)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ struct proc *p = pcpui->cur_proc;
- assert(irq_is_enabled()); /* in case we proc destroy */
/* In lieu of pinning, we just check the sysc and will PF on the user addr
* later (if the addr was unmapped). Which is the plan for all UMEM. */
if (!is_user_rwaddr(sysc, sizeof(struct syscall))) {
}
pcpui->cur_kthread->sysc = sysc; /* let the core know which sysc it is */
systrace_start_trace(pcpui->cur_kthread, sysc);
+ pcpui = &per_cpu_info[core_id()]; /* reload again */
alloc_sysc_str(pcpui->cur_kthread);
/* syscall() does not return for exec and yield, so put any cleanup in there
* too. */
pcpui = &per_cpu_info[core_id()];
free_sysc_str(pcpui->cur_kthread);
systrace_finish_trace(pcpui->cur_kthread, sysc->retval);
+ pcpui = &per_cpu_info[core_id()]; /* reload again */
/* Some 9ns paths set errstr, but not errno. glibc will ignore errstr.
* this is somewhat hacky, since errno might get set unnecessarily */
if ((current_errstr()[0] != 0) && (!sysc->err))
sysc->err = EUNSPECIFIED;
finish_sysc(sysc, pcpui->cur_proc);
- pcpui->cur_kthread->sysc = 0; /* no longer working on sysc */
+ pcpui->cur_kthread->sysc = NULL; /* No longer working on sysc */
}
/* A process can trap and call this function, which will set up the core to
* at least one, it will run it directly. */
void prep_syscalls(struct proc *p, struct syscall *sysc, unsigned int nr_syscs)
{
- int retval;
/* Careful with pcpui here, we could have migrated */
if (!nr_syscs) {
printk("[kernel] No nr_sysc, probably a bug, user!\n");
}
}
-/* Syscall tracing */
-static void __init_systrace(void)
-{
- systrace_buffer = kmalloc(MAX_SYSTRACES*sizeof(struct systrace_record), 0);
- if (!systrace_buffer)
- panic("Unable to alloc a trace buffer\n");
- systrace_bufidx = 0;
- systrace_bufsize = MAX_SYSTRACES;
- /* Note we never free the buffer - it's around forever. Feel free to change
- * this if you want to change the size or something dynamically. */
-}
-
-/* If you call this while it is running, it will change the mode */
-void systrace_start(bool silent)
-{
- static bool init = FALSE;
- spin_lock_irqsave(&systrace_lock);
- if (!init) {
- __init_systrace();
- init = TRUE;
- }
- systrace_flags = silent ? SYSTRACE_ON : SYSTRACE_ON | SYSTRACE_LOUD;
- spin_unlock_irqsave(&systrace_lock);
-}
-
-int systrace_reg(bool all, struct proc *p)
-{
- int retval = 0;
- spin_lock_irqsave(&systrace_lock);
- if (all) {
- printk("Tracing syscalls for all processes\n");
- systrace_flags |= SYSTRACE_ALLPROC;
- retval = 0;
- } else {
- for (int i = 0; i < MAX_NUM_TRACED; i++) {
- if (!systrace_procs[i]) {
- printk("Tracing syscalls for process %d\n", p->pid);
- systrace_procs[i] = p;
- retval = 0;
- break;
- }
- }
- }
- spin_unlock_irqsave(&systrace_lock);
- return retval;
-}
-
-int systrace_trace_pid(struct proc *p)
-{
- if (systrace_reg(false, p))
- error("no more processes");
- systrace_start(true);
- return 0;
-}
-
-void systrace_stop(void)
-{
- spin_lock_irqsave(&systrace_lock);
- systrace_flags = 0;
- for (int i = 0; i < MAX_NUM_TRACED; i++)
- systrace_procs[i] = 0;
- spin_unlock_irqsave(&systrace_lock);
-}
-
-/* If you registered a process specifically, then you need to dereg it
- * specifically. Or just fully stop, which will do it for all. */
-int systrace_dereg(bool all, struct proc *p)
-{
- spin_lock_irqsave(&systrace_lock);
- if (all) {
- printk("No longer tracing syscalls for all processes.\n");
- systrace_flags &= ~SYSTRACE_ALLPROC;
- } else {
- for (int i = 0; i < MAX_NUM_TRACED; i++) {
- if (systrace_procs[i] == p) {
- systrace_procs[i] = 0;
- printk("No longer tracing syscalls for process %d\n", p->pid);
- }
- }
- }
- spin_unlock_irqsave(&systrace_lock);
- return 0;
-}
-
-/* Regardless of locking, someone could be writing into the buffer */
-void systrace_print(bool all, struct proc *p)
-{
- spin_lock_irqsave(&systrace_lock);
- /* if you want to be clever, you could make this start from the earliest
- * timestamp and loop around. Careful of concurrent writes. */
- for (int i = 0; i < systrace_bufsize; i++)
- if (systrace_buffer[i].start_timestamp)
- printk("[%16llu] Syscall %3d (%12s):(%p, %p, %p, %p, %p,"
- "%p) proc: %d core: %d vcore: %d\n",
- systrace_buffer[i].start_timestamp,
- systrace_buffer[i].syscallno,
- syscall_table[systrace_buffer[i].syscallno].name,
- systrace_buffer[i].arg0,
- systrace_buffer[i].arg1,
- systrace_buffer[i].arg2,
- systrace_buffer[i].arg3,
- systrace_buffer[i].arg4,
- systrace_buffer[i].arg5,
- systrace_buffer[i].pid,
- systrace_buffer[i].coreid,
- systrace_buffer[i].vcoreid);
- spin_unlock_irqsave(&systrace_lock);
-}
-
-void systrace_clear_buffer(void)
-{
- spin_lock_irqsave(&systrace_lock);
- memset(systrace_buffer, 0, sizeof(struct systrace_record) * MAX_SYSTRACES);
- spin_unlock_irqsave(&systrace_lock);
-}
-
bool syscall_uses_fd(struct syscall *sysc, int fd)
{
switch (sysc->num) {
void print_sysc(struct proc *p, struct syscall *sysc)
{
- struct proc *old_p = switch_to(p);
+ uintptr_t old_p = switch_to(p);
printk("SYS_%d, flags %p, a0 %p, a1 %p, a2 %p, a3 %p, a4 %p, a5 %p\n",
sysc->num, atomic_read(&sysc->flags),
sysc->arg0, sysc->arg1, sysc->arg2, sysc->arg3, sysc->arg4,