parlib: have 2LS libraries #include parlib/stdio.h
[akaros.git] / user / vmm / linuxemu.c
index 719da68..a495835 100644 (file)
@@ -5,7 +5,7 @@
 
 #include <sys/stat.h>
 #include <sys/types.h>
-#include <stdio.h>
+#include <parlib/stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <dlfcn.h>
 #include <sys/mman.h>
 #include <futex.h>
+#include <sys/epoll.h>
 
 // This is the maximum fd number we allow opened in dune
 #define DUNE_NR_FILE_DESC 100
 
+// Some defines used in linux syscalls
+#define FALLOC_FL_KEEP_SIZE 1
+#define FALLOC_FL_PUNCH_HOLE 2
+#define GRND_NONBLOCK 0x0001
+#define GRND_RANDOM 0x0002
+
 static int lemu_debug;
 
 static uth_mutex_t *lemu_logging_lock;
@@ -93,7 +100,8 @@ bool get_absolute_path_from_fd(int fd, const char *path, char **absolute_path)
        int len1 = strlen(path);
 
        if (len1 == 0) {
-               fprintf(stderr, "get_absolute_path_from_fd: suffix is empty.\n");
+               fprintf(stderr,
+                       "get_absolute_path_from_fd: suffix is empty.\n");
                return false;
        }
 
@@ -111,7 +119,8 @@ bool get_absolute_path_from_fd(int fd, const char *path, char **absolute_path)
        uth_mutex_lock(fd_table_lock);
        if (!openfd_filenames[fd]) {
                uth_mutex_unlock(fd_table_lock);
-               fprintf(stderr, "get_absolute_path_from_fd: no file open at fd.\n");
+               fprintf(stderr,
+                       "get_absolute_path_from_fd: no file open at fd.\n");
                return false;
        }
 
@@ -152,7 +161,7 @@ bool get_absolute_path_from_fd(int fd, const char *path, char **absolute_path)
 
 //Akaros open flags are different than linux
 //This function converts them
-int translate_open_flags(int flags)
+int convert_open_flags_ltoa(int flags)
 {
        int lower3bits = flags & 0x7;
        int otherstuff = flags & ~(0x7);
@@ -167,13 +176,34 @@ int translate_open_flags(int flags)
        case 2:
                otherstuff |= O_RDWR;
                break;
-       case 3:
-               otherstuff |= O_ACCMODE;
+       default:
+               // TODO(ganshun): We panic here for now if they are trying
+               // behavior we do not expect
+               panic("linuxemu, convert_open_flags_ltoa: unknown open flags provided\n");
+               break;
+       }
+       return otherstuff;
+}
+
+int convert_open_flags_atol(int flags)
+{
+       int lower3bits = flags & 0x7;
+       int otherstuff = flags & ~(0x7);
+
+       switch (lower3bits) {
+       case O_RDONLY:
+               otherstuff |= 0;
+               break;
+       case O_WRONLY:
+               otherstuff |= 1;
+               break;
+       case O_RDWR:
+               otherstuff |= 2;
                break;
        default:
-               // TODO(ganshun): We panic here for now if they are trying behavior we
-               // do not expect
-               panic("linuxemu, translate_open_flags: unknown open flags provided\n");
+               // TODO(ganshun): We panic here for now if they are trying
+               // behavior we do not expect
+               panic("linuxemu, convert_open_flags_atol: unknown open flags provided\n");
                break;
        }
        return otherstuff;
@@ -217,6 +247,27 @@ bool update_fd_map(int fd, const char *path)
        return true;
 }
 
+void convert_stat_akaros_to_linux(struct stat *si_akaros,
+                                  struct linux_stat_amd64 *si)
+{
+       si->st_dev =  (uint64_t) si_akaros->st_dev;
+       si->st_ino = (uint64_t) si_akaros->st_ino;
+       si->st_mode = (uint32_t) si_akaros->st_mode;
+       si->st_nlink = (uint64_t) si_akaros->st_nlink;
+       si->st_uid = (uint32_t) si_akaros->st_uid;
+       si->st_gid = (uint32_t) si_akaros->st_gid;
+       si->st_rdev = (uint64_t) si_akaros->st_rdev;
+       si->st_size = (int64_t) si_akaros->st_size;
+       si->st_blksize = (int64_t) si_akaros->st_blksize;
+       si->st_blocks = (int64_t) si_akaros->st_blocks;
+
+       //For now the akaros timespec works out... this might change
+       //akaros timespec must be 2x int64_t for this to be valid
+       si->st_atim = si_akaros->st_atim;
+       si->st_mtim = si_akaros->st_mtim;
+       si->st_ctim = si_akaros->st_ctim;
+}
+
 /////////////////////////////////////
 // BEGIN DUNE SYSCALL IMPLEMENTATIONS
 /////////////////////////////////////
@@ -253,6 +304,9 @@ bool dune_sys_fcntl(struct vm_trapframe *tf)
                          "ERROR %d\n", err);
                tf->tf_rax = -err;
        } else {
+               // TODO(ganshun): fix fcntl SETFL, ak flags are different.
+               if (tf->tf_rsi == 3)
+                       retval = convert_open_flags_atol(retval);
                lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
                          "SUCCESS %d\n", retval);
                tf->tf_rax = retval;
@@ -284,7 +338,8 @@ bool dune_sys_pread64(struct vm_trapframe *tf)
 
 bool dune_sys_read(struct vm_trapframe *tf)
 {
-       ssize_t retval = read(tf->tf_rdi, (void*) tf->tf_rsi, (size_t) tf->tf_rdx);
+       ssize_t retval = read(tf->tf_rdi, (void*) tf->tf_rsi,
+                             (size_t) tf->tf_rdx);
        int err = errno;
 
        if (retval == -1) {
@@ -344,7 +399,8 @@ bool dune_sys_getpid(struct vm_trapframe *tf)
        // Getpid always suceeds
        int retval = getpid();
 
-       lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false, "SUCCESS %d\n", retval);
+       lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false, "SUCCESS %d\n",
+                 retval);
        tf->tf_rax = retval;
        return true;
 }
@@ -406,7 +462,8 @@ bool dune_sys_umask(struct vm_trapframe *tf)
        //Umask always succeeds
        int retval = umask((mode_t) tf->tf_rdi);
 
-       lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false, "SUCCESS %d\n", retval);
+       lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false, "SUCCESS %d\n",
+                 retval);
        tf->tf_rax = retval;
        return true;
 }
@@ -473,7 +530,8 @@ bool dune_sys_gettid(struct vm_trapframe *tf)
        // Gettid always succeeds
        int retval = tf->tf_guest_pcoreid;
 
-       lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false, "SUCCESS %d\n", retval);
+       lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false, "SUCCESS %d\n",
+                 retval);
        tf->tf_rax = retval;
        return true;
 }
@@ -486,12 +544,13 @@ bool dune_sys_open(struct vm_trapframe *tf)
 {
        const char *file = (const char *) tf->tf_rdi;
        int flags = tf->tf_rsi;
+       int mode = tf->tf_rdx;
 
-       flags = translate_open_flags(flags);
+       flags = convert_open_flags_ltoa(flags);
        lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
                  "Trying to open \"%s\"\n", file);
 
-       int retval  = open(file, flags);
+       int retval  = open(file, flags, mode);
        int err = errno;
 
        if (retval == -1) {
@@ -525,11 +584,11 @@ bool dune_sys_openat(struct vm_trapframe *tf)
        // where we'd want to recover and return EBADF or ENOTDIR
        if (!get_absolute_path_from_fd(fd, s, &s_absolute)) {
                panic("[TID %d] %s: ERROR in getting absolute path fd was %d, suffix was %s\n",
-                     tf->tf_guest_pcoreid, dune_syscall_table[tf->tf_rax].name, fd, s);
+                     tf->tf_guest_pcoreid, dune_syscall_table[tf->tf_rax].name,
+                     fd, s);
        }
 
-       flags = translate_open_flags(flags);
-
+       flags = convert_open_flags_ltoa(flags);
        lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
                  "trying to open absolute path %s with translated flags %p\n",
                  s, flags);
@@ -570,8 +629,8 @@ bool dune_sys_readlinkat(struct vm_trapframe *tf)
 
        if (!get_absolute_path_from_fd(fd, s, &s_absolute)) {
                panic("[TID %d] %s: ERROR in getting absolute path fd was %d, suffix was %s\n",
-                     tf->tf_guest_pcoreid, dune_syscall_table[tf->tf_rax].name, fd,
-                     s);
+                     tf->tf_guest_pcoreid, dune_syscall_table[tf->tf_rax].name,
+                     fd, s);
        }
 
        lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
@@ -602,8 +661,8 @@ bool dune_sys_unlinkat(struct vm_trapframe *tf)
 
        if (!get_absolute_path_from_fd(fd, s, &s_absolute)) {
                panic("[TID %d] %s: ERROR in getting absolute path fd was %d, suffix was %s\n",
-                     tf->tf_guest_pcoreid, dune_syscall_table[tf->tf_rax].name, fd,
-                     s);
+                     tf->tf_guest_pcoreid, dune_syscall_table[tf->tf_rax].name,
+                     fd, s);
        }
 
        lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
@@ -661,43 +720,247 @@ bool dune_sys_sched_yield(struct vm_trapframe *tf)
 
 bool dune_sys_fstat(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       struct stat si_akaros_val;
+       int fd = tf->tf_rdi;
+       struct linux_stat_amd64 *si = (struct linux_stat_amd64*) tf->tf_rsi;
+
+       // TODO(ganshun): Check if mmaped
+       if (!si) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR %d\n", EFAULT);
+               tf->tf_rax = -EFAULT;
+               return true;
+       }
+
+       struct stat *si_akaros = &si_akaros_val;
+
+       // Make sure we zero out the data on the stack
+       memset((void*) si_akaros, 0, sizeof(struct stat));
+
+       int retval = fstat(fd, si_akaros);
+       int err = errno;
+
+       if (retval == -1) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR %d\n", err);
+               tf->tf_rax = -err;
+       } else {
+               convert_stat_akaros_to_linux(si_akaros, si);
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
+                         "SUCCESS %d\n", retval);
+               tf->tf_rax = retval;
+       }
+       return true;
 }
 
 bool dune_sys_stat(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       struct stat si_akaros_val;
+       const char *path = (const char*) tf->tf_rdi;
+       struct linux_stat_amd64 *si = (struct linux_stat_amd64*) tf->tf_rsi;
+
+       // TODO(ganshun): Check if mmaped
+       if (!si) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR %d\n", EFAULT);
+               tf->tf_rax = -EFAULT;
+               return true;
+       }
+
+       struct stat *si_akaros = &si_akaros_val;
+
+       // Make sure we zero out the data on the stack
+       memset((void*) si_akaros, 0, sizeof(struct stat));
+
+       int retval = stat(path, si_akaros);
+       int err = errno;
+
+       if (retval == -1) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR %d\n", err);
+               tf->tf_rax = -err;
+       } else {
+               convert_stat_akaros_to_linux(si_akaros, si);
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
+                         "SUCCESS %d\n", retval);
+               tf->tf_rax = retval;
+       }
+       return true;
 }
 
 ///////////////////////////////////////////////////
 // Newly Implemented Syscalls
 ///////////////////////////////////////////////////
 
+// Dune implementation of fallocate, it just writes zeros for now
+int dune_fallocate(int fd, int mode, off_t offset, off_t len)
+{
+       if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)))
+               return posix_fallocate(fd, offset, len);
+
+       if (offset < 0 || len <= 0) {
+               errno = EINVAL;
+               return -1;
+       }
+       struct stat st;
+       int ret = fstat(fd, &st);
+
+       if (ret == -1) {
+               errno == EBADF;
+               return -1;
+       }
+       if (offset + len >= st.st_size) {
+               // Panic here as we cannot support changing the size of the file
+               // right now.
+               panic("dune_fallocate: would write over the size of the file!");
+       }
+       if (S_ISFIFO(st.st_mode)) {
+               errno = ESPIPE;
+               return -1;
+       }
+       if (!(S_ISREG(st.st_mode) || S_ISDIR(st.st_mode))) {
+               errno = ENODEV;
+               return -1;
+       }
+
+       // TODO(ganshun): For punch hole, we just write zeros to the file for
+       // now
+       if ((mode & FALLOC_FL_PUNCH_HOLE) && (mode & FALLOC_FL_KEEP_SIZE)) {
+               const size_t buffer_size = 0x100000;
+               int pos;
+               ssize_t amt = 0;
+               size_t tot = 0;
+               size_t size;
+               char *buf = calloc(sizeof(char), buffer_size);
+
+               if (!buf)
+                       panic("dune_fallocate: could not allocate a buffer\n");
+
+               for (pos = offset; pos < offset + len; pos += amt) {
+                       size = len + offset - pos;
+                       if (size > buffer_size)
+                               size = buffer_size;
+                       amt = write(fd, buf, size);
+                       if (amt == -1) {
+                               free(buf);
+                               errno = EIO;
+                               return -1;
+                       }
+                       tot += amt;
+                       fprintf(stderr, "%d bytes written so far\n", tot);
+               }
+               free(buf);
+               return tot;
+       }
+
+       // Unsupported otherwise
+       errno = ENOSYS;
+       return -1;
+}
+
 // Fallocate syscall
 bool dune_sys_fallocate(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       int fd = (int) tf->tf_rdi;
+       int mode = (int) tf->tf_rsi;
+       off_t offset = (off_t) tf->tf_rdx;
+       off_t len = (off_t) tf->tf_r10;
+
+       int retval = dune_fallocate(fd, mode, offset, len);
+       int err = errno;
+
+       if (retval == -1) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR %d\n", err);
+               tf->tf_rax = -err;
+       } else {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
+                         "SUCCESS %d\n", retval);
+               tf->tf_rax = retval;
+       }
+       return true;
 }
 
+// Currently unsupported
 bool dune_sys_sched_getaffinity(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = -ENOSYS;
+       return true;
 }
 
+// We do not implement pselect; however, some applications may try to
+// use it as a portable way to sleep. If that is the case, then we
+// allow it
 bool dune_sys_pselect6(struct vm_trapframe *tf)
 {
-       // To Be Implemented
+       int nfds = (int) tf->tf_rdi;
+       fd_set *readfds = (fd_set *) tf->tf_rsi;
+       fd_set *writefds = (fd_set *) tf->tf_rdx;
+       fd_set *exceptfds = (fd_set *) tf->tf_r10;
+       const struct timespec *timeout = (const struct timespec *) tf->tf_r8;
+       const sigset_t *sigmask = (const sigset_t *) tf->tf_r9;
+
+       // Check if process wants to sleep
+       if (nfds == 0 && readfds == NULL && writefds == NULL &&
+           exceptfds == NULL && timeout != NULL) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
+                         "Sleeping for %ld seconds, %ld nanoseconds\n",
+                         timeout->tv_sec, timeout->tv_nsec);
+               nanosleep(timeout, NULL);
+               tf->tf_rax = 0;
+               return true;
+       }
+
+       lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                 "unimplemented, will now fail...\n");
        return false;
 }
 
 bool dune_sys_getrandom(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       const char *random_source = "/dev/urandom";
+       void *buf = (void*) tf->tf_rdi;
+       size_t len = (size_t) tf->tf_rsi;
+       unsigned int flags = (unsigned int) tf->tf_rdx;
+
+       if (!buf) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "buffer inaccessable\n");
+               tf->tf_rax = -EFAULT;
+               return true;
+       }
+       if (flags & GRND_RANDOM || flags & GRND_NONBLOCK) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "unsupported flags specified\n");
+               tf->tf_rax = -EINVAL;
+               return true;
+       }
+
+       int fd = open(random_source, O_RDONLY);
+       int err = errno;
+
+       if (fd == -1) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR opening random source %s, errno=%d\n",
+                         random_source, err);
+               return false;
+       }
+
+       ssize_t retval = read(fd, buf, len);
+
+       err = errno;
+       close(fd);
+       if (retval == -1) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR reading from random source %s, errno=%d\n",
+                         random_source, err);
+               tf->tf_rax = -err;
+       } else {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
+                         "SUCCESS %zd\n", retval);
+               tf->tf_rax = retval;
+       }
+       return true;
 }
 
 /////////////////////////////////////////////////////////////////
@@ -707,84 +970,132 @@ bool dune_sys_getrandom(struct vm_trapframe *tf)
 
 bool dune_sys_getgroups(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = 0;
+       return true;
 }
 
 
 bool dune_sys_geteuid(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = 0;
+       return true;
 }
 
 bool dune_sys_getegid(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = 0;
+       return true;
 }
 
 
 bool dune_sys_getuid(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = 0;
+       return true;
 }
 
+
 bool dune_sys_getgid(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = 0;
+       return true;
 }
 
+// TODO(ganshun): implement mincore
 bool dune_sys_mincore(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = -ENOMEM;
+       return true;
 }
 
 bool dune_sys_rt_sigprocmask(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       int retval = sigprocmask(tf->tf_rdi, (const sigset_t*) tf->tf_rsi,
+                                (sigset_t*) tf->tf_rdx);
+       int err = errno;
+
+       if (retval == -1) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR %d\n", err);
+               tf->tf_rax = -err;
+       } else {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
+                         "SUCCESS %d\n", retval);
+               tf->tf_rax = retval;
+       }
+       return true;
 }
 
+// TODO(ganshun): sigaltstack needs to implemented for the guest
 bool dune_sys_sigaltstack(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = 0;
+       return true;
 }
 
+
+// TODO(ganshun): more signal code, we need to be careful with this one,
+// we should not register guest signal handlers in akaros
 bool dune_sys_rt_sigaction(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = 0;
+       return true;
 }
 
+//TODO(ganshun): we do not support epoll currently except for create and wait
 bool dune_sys_epoll_create1(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       int flags = 0;
+       // TODO(ganshun): epoll_create is not fully supported for all flags
+       // so we ignore the flags variable in the trapframe since it is not used.
+       int retval = epoll_create1(flags);
+       int err = errno;
+
+       if (retval == -1) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR %d\n", err);
+               tf->tf_rax = -err;
+       } else {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
+                         "SUCCESS %d\n", retval);
+               tf->tf_rax = retval;
+       }
+       return true;
 }
 
 bool dune_sys_epoll_wait(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       int epfd = (int) tf->tf_rdi;
+       struct epoll_event *events = (struct epoll_event*) tf->tf_rsi;
+       int maxevents = (int) tf->tf_rdx;
+       int timeout = (int) tf->tf_r10;
+       int retval = epoll_wait(epfd, events, maxevents, timeout);
+       int err = errno;
+
+       if (retval == -1) {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, true,
+                         "ERROR %d\n", err);
+               tf->tf_rax = -err;
+       } else {
+               lemuprint(tf->tf_guest_pcoreid, tf->tf_rax, false,
+                         "SUCCESS %d\n", retval);
+               tf->tf_rax = retval;
+       }
+       return true;
 }
 
 // Unimplemented
 bool dune_sys_epoll_ctl(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = -ENOSYS;
+       return true;
 }
 
 // Unimplemented
 bool dune_sys_fstatfs(struct vm_trapframe *tf)
 {
-       // To Be Implemented
-       return false;
+       tf->tf_rax = -ENOSYS;
+       return true;
 }
 
 // Main syscall table
@@ -1138,7 +1449,8 @@ bool init_linuxemu(void)
        }
 
        if (dlopen("liblinuxemu_extend.so", RTLD_NOW) == NULL) {
-               fprintf(stderr, "Not using any syscall extensions\n Reason: %s\n",
+               fprintf(stderr,
+                       "Not using any syscall extensions\n Reason: %s\n",
                        dlerror());
                return false;
        }
@@ -1200,8 +1512,7 @@ void lemuprint(const uint32_t tid, uint64_t syscall_number,
  * call, and in many cases we have to rearrange arguments
  * since Linux and Akaros don't share signatures, so this
  * gets tricky. */
-bool
-linuxemu(struct guest_thread *gth, struct vm_trapframe *tf)
+bool linuxemu(struct guest_thread *gth, struct vm_trapframe *tf)
 {
        bool ret = false;