vmm: Added more syscalls and helpers to linuxemu
[akaros.git] / tests / dune / dune.c
index 96ded4b..94e0f90 100644 (file)
 #include <parlib/uthread.h>
 #include <vmm/linux_bootparam.h>
 #include <getopt.h>
-
+#include <iplib/iplib.h>
 #include <vmm/sched.h>
 #include <sys/eventfd.h>
 #include <sys/uio.h>
+#include <err.h>
+#include <vmm/linuxemu.h>
+#include <vmm/vmm.h>
+#include <vmm/vthread.h>
 
-static struct virtual_machine local_vm, *vm = &local_vm;
 struct vmm_gpcore_init gpci;
-static void *ram;
+bool linuxemu(struct guest_thread *gth, struct vm_trapframe *tf);
+
+
+extern char **environ;
+
+static struct virtual_machine vm = {.halt_exit = true,
+                                    .mtx = UTH_MUTEX_INIT,
+                                    .vmcall = linuxemu};
+
 static unsigned long long memsize = GiB;
 static uintptr_t memstart = MinMemory;
-static uintptr_t stack;
-static unsigned long long *p512, *p1, *p2m;
 
-static int debug = 0;
+static int dune_debug;
 
-/* load_kernel loads an ELF file as a kernel. */
-uintptr_t
-load_kernel(char *filename)
+static void hlt(void)
 {
-       Elf64_Ehdr *ehdr;
-       Elf *elf;
-       size_t phnum = 0;
-       Elf64_Phdr *hdrs;
-       int fd;
-
-       elf_version(EV_CURRENT);
-       fd = open(filename, O_RDONLY);
-       if (fd < 0) {
-               fprintf(stderr, "Can't open %s: %r\n", filename);
-               return 0;
-       }
+       __asm__ __volatile__("\thlt\n\t");
+}
+
+static int pc(char *c)
+{
+       __asm__ __volatile__("movq $1, %%rax\n"
+                            "movq $1, %%rdi\n"
+                            "movq %0, %%rsi\n"
+                            "movq $1, %%rdx\n"
+                            "vmcall\n" ::
+                            "m"(c) : "rdi", "rax", "rsi", "rdx");
+       return 0;
+}
+
+static void xnum(uint64_t x)
+{
+       static char *hex = "0123456789abcdef";
 
-       elf = elf_begin(fd, ELF_C_READ, NULL);
-       if (elf == NULL) {
-               fprintf(stderr, "%s: cannot read %s ELF file.\n", __func__, filename);
-               close(fd);
-               return 0;
+       for (int i = 0; i < 8; i++) {
+               uint8_t v = ((uint8_t*)&x)[7 - i];
+               pc(&hex[v >> 4]);
+               pc(&hex[v & 0xf]);
        }
+}
+
+static void show(char *s)
+{
+       static char *showedoff = "NULL POINTER: That's bad.\n";
 
-       ehdr = elf64_getehdr(elf);
-       if (ehdr == NULL) {
-               fprintf(stderr, "%s: cannot get exec header of %s.\n",
-                       __func__, filename);
-               goto fail;
+       if (!s) {
+               show(showedoff);
+               return;
        }
-       fprintf(stderr, "%s ELF entry point is %p\n", filename,
-               (void *)ehdr->e_entry);
+       while (*s) {
+               pc(s);
+               s++;
+       }
+}
+
+/* This is a small test that runs in gr0 and tests our argument setup.
+ * This test can grow in capability as we find more broken bits in our
+ * dune-like environment. */
 
-       if (elf_getphdrnum(elf, &phnum) < 0) {
-               fprintf(stderr, "%s: cannot get program header num of %s.\n",
-                       __func__, filename);
-               goto fail;
+void dune_test(void *stack)
+{
+       show("Hello this is dune's test\n");
+
+       int argc;
+       char **argv;
+       struct elf_aux *auxv;
+
+       show("dune_test: dumping argv, env, and aux\n");
+
+       argc = *((uint64_t*)stack);
+       argv = &((char**)stack)[1];
+       show("argc: "); xnum(argc); show("\n");
+       show("argv: "); xnum((uint64_t)argv); show("\n");
+
+       for (int i = 0; i < argc; i++, argv++) {
+               show("arg["); xnum(i); show("]:");
+               show(argv[0]);
+               show("\n");
+       }
+       // skip the null and move on to envp.
+       argv++;
+       for (int i = 0; argv[0]; i++, argv++) {
+               show("env["); xnum(i); show("]:");
+               show(argv[0]);
+               show("\n");
        }
-       fprintf(stderr, "%s has %p program headers\n", filename, phnum);
+       // skip the null and move on to auxv.
+       argv++;
+       auxv = (void *)argv;
+       for (int i = 0; auxv[i].v[0]; i++) {
+               show("auxv["); xnum(i); show("]:");
+               xnum(auxv[i].v[0]); show(":");
+               xnum(auxv[i].v[1]); show("\n");
+       }
+       show("Done dumping [argv, env, auxv]\n");
+       show("Testing syscall extensions\n");
+       __asm__ __volatile__("movq $400, %%rax\n"
+                            "vmcall\n" :: );
+       hlt();
+}
 
-       hdrs = elf64_getphdr(elf);
-       if (hdrs == NULL) {
-               fprintf(stderr, "%s: cannot get program headers of %s.\n",
-                       __func__, filename);
-               goto fail;
+static struct option long_options[] = {
+       {"aux",           required_argument, 0, 'a'},
+       {"debug",         no_argument,       0, 'd'},
+       {"memsize",       required_argument, 0, 'm'},
+       {"memstart",      required_argument, 0, 'M'},
+       {"cmdline_extra", required_argument, 0, 'c'},
+       {"greedy",        no_argument,       0, 'g'},
+       {"scp",           no_argument,       0, 's'},
+       {"test",          no_argument,       0, 't'},
+       {"help",          no_argument,       0, 'h'},
+       {0, 0, 0, 0}
+};
+
+static void
+usage(void)
+{
+       // Sadly, the getopt_long struct does
+       // not have a pointer to help text.
+       fprintf(stderr,
+             "Usage: dune [options] <ELF file] [<ELF file>...]\n");
+       fprintf(stderr,
+             "Or for testing: dune -t [options]\nOptions are:\n");
+       for (int i = 0;
+            i < COUNT_OF(long_options) - 1;
+            i++) {
+               struct option *l = &long_options[i];
+
+               fprintf(stderr, "%s or %c%s\n", l->name, l->val,
+                       l->has_arg ? " <arg>" : "");
        }
+       exit(0);
+}
 
-       for (int i = 0; i < phnum; i++) {
-               size_t tot;
-               Elf64_Phdr *h = &hdrs[i];
-               uintptr_t pa;
+static struct elf_aux *
+getextra(int *auxc, char *_s)
+{
+       struct elf_aux *auxv;
+       char *s = strdup(_s);
+       // icky hardcode, but realistic.
+       char *auxpairs[32];
 
-               fprintf(stderr,
-                       "%d: type 0x%lx flags 0x%lx  offset 0x%lx vaddr 0x%lx paddr 0x%lx size 0x%lx  memsz 0x%lx align 0x%lx\n",
-                       i,
-                       h->p_type,              /* Segment type */
-                       h->p_flags,             /* Segment flags */
-                       h->p_offset,            /* Segment file offset */
-                       h->p_vaddr,             /* Segment virtual address */
-                       h->p_paddr,             /* Segment physical address */
-                       h->p_filesz,            /* Segment size in file */
-                       h->p_memsz,             /* Segment size in memory */
-                       h->p_align              /* Segment alignment */);
-               if (h->p_type != PT_LOAD)
-                       continue;
-               if ((h->p_flags & (PF_R | PF_W | PF_X)) == 0)
-                       continue;
-
-               pa = h->p_paddr;
-               fprintf(stderr,
-                       "Read header %d @offset %p to %p (elf PA is %p) %d bytes:",
-                       i, h->p_offset, pa, h->p_paddr, h->p_filesz);
-               tot = 0;
-               while (tot < h->p_filesz) {
-                       int amt = pread(fd, (void *)(pa + tot), h->p_filesz - tot,
-                                       h->p_offset + tot);
-                       if (amt < 1)
-                               break;
-                       tot += amt;
-               }
-               fprintf(stderr, "read a total of %d bytes\n", tot);
-               if (tot < h->p_filesz) {
-                       fprintf(stderr, "%s: got %d bytes, wanted %d bytes\n",
-                               filename, tot, h->p_filesz);
-                       goto fail;
+       *auxc = gettokens(s, auxpairs, 32, ",");
+       if (dune_debug)
+               fprintf(stderr, "Found %d extra aux pairs\n", *auxc);
+       if (*auxc < 1)
+               return NULL;
+       auxv = malloc(sizeof(*auxv) * *auxc);
+       if (!auxv)
+               errx(1, "auxv malloc: %r");
+       for (int i = 0; i < *auxc; i++) {
+               char *aux[2];
+               int j;
+               uint32_t t, v;
+
+               j = gettokens(auxpairs[i], aux, 2, "=");
+               if (j < 2) {
+                       fprintf(stderr, "%s: should be in the form type=val\n",
+                               auxpairs[i]);
+                       free(auxv);
+                       return NULL;
                }
+               t = strtoul(aux[0], 0, 0);
+               v = strtoul(aux[1], 0, 0);
+               auxv[i].v[0] = t;
+               auxv[i].v[1] = v;
+               if (dune_debug)
+                       fprintf(stderr, "Adding aux pair 0x%x:0x%x\n", auxv[i].v[0],
+                               auxv[i].v[1]);
        }
+       return auxv;
 
-       close(fd);
-       elf_end(elf);
-       return ehdr->e_entry;
- fail:
-       close(fd);
-       elf_end(elf);
-       return 0;
+}
+
+static struct elf_aux *
+buildaux(struct elf_aux *base, int basec, struct elf_aux *extra, int extrac)
+{
+       int total = basec + extrac;
+       struct elf_aux *ret;
+
+       ret = realloc(extra, total * sizeof(*ret));
+       if (!ret)
+               return NULL;
+
+       if (dune_debug)
+               fprintf(stderr, "buildaux: consolidating %d aux and %d extra\n",
+                       basec, extrac);
+       /* TOOD: check for dups. */
+       if (basec)
+               memmove(&ret[extrac], base, sizeof(*base)*basec);
+       return ret;
 }
 
 int main(int argc, char **argv)
 {
-       int vmmflags = VMM_VMCALL_PRINTF;
+       void *tos;
+       int envc, auxc, extrac = 0;
+       struct elf_aux *auxv, *extra = NULL;
        uint64_t entry = 0;
-       int ret;
-       struct vm_trapframe *vm_tf;
+       struct vthread *vth;
+       struct vmm_gpcore_init gpci[1];
        int c;
+       int test = 0;
        int option_index;
-       static struct option long_options[] = {
-               {"debug",         no_argument,       0, 'd'},
-               {"vmmflags",      required_argument, 0, 'v'},
-               {"memsize",       required_argument, 0, 'm'},
-               {"memstart",      required_argument, 0, 'M'},
-               {"stack",         required_argument, 0, 'S'},
-               {"cmdline_extra", required_argument, 0, 'c'},
-               {"greedy",        no_argument,       0, 'g'},
-               {"scp",           no_argument,       0, 's'},
-               {"help",          no_argument,       0, 'h'},
-               {0, 0, 0, 0}
-       };
+       int ac = argc;
+       char **av = argv;
 
        fprintf(stderr, "%p %p %p %p\n", PGSIZE, PGSHIFT, PML1_SHIFT,
-                       PML1_PTE_REACH);
+               PML1_PTE_REACH);
 
        if ((uintptr_t)__procinfo.program_end >= MinMemory) {
                fprintf(stderr,
@@ -165,52 +249,48 @@ int main(int argc, char **argv)
                exit(1);
        }
 
-       while ((c = getopt_long(argc, argv, "dv:m:M:S:gsh", long_options,
+       while ((c = getopt_long(argc, argv, "a:dv:m:M:gsth", long_options,
                                &option_index)) != -1) {
                switch (c) {
-                       case 'd':
-                               debug++;
-                               break;
-                       case 'v':
-                               vmmflags = strtoull(optarg, 0, 0);
-                               break;
-                       case 'm':
-                               memsize = strtoull(optarg, 0, 0);
-                               break;
-                       case 'M':
-                               memstart = strtoull(optarg, 0, 0);
-                               break;
-                       case 'S':
-                               stack = strtoull(optarg, 0, 0);
-                               break;
-                       case 'g':       /* greedy */
-                               parlib_never_yield = TRUE;
-                               break;
-                       case 's':       /* scp */
-                               parlib_wants_to_be_mcp = FALSE;
-                               break;
-                       case 'h':
-                       default:
-                               // Sadly, the getopt_long struct does
-                               // not have a pointer to help text.
-                               for (int i = 0;
-                                   i < sizeof(long_options)/sizeof(long_options[0]) - 1;
-                                   i++) {
-                                       struct option *l = &long_options[i];
-
-                                       fprintf(stderr, "%s or %c%s\n", l->name, l->val,
-                                               l->has_arg ? " <arg>" : "");
-                               }
-                               exit(0);
+               case 'a':
+                       extra = getextra(&extrac, optarg);
+                       if (dune_debug)
+                               fprintf(stderr, "Added %d aux items\n", extrac);
+                       break;
+               case 'd':
+                       fprintf(stderr, "SET DEBUG\n");
+                       dune_debug++;
+                       break;
+               case 'm':
+                       memsize = strtoull(optarg, 0, 0);
+                       break;
+               case 'M':
+                       memstart = strtoull(optarg, 0, 0);
+                       break;
+               case 'g':       /* greedy */
+                       parlib_never_yield = TRUE;
+                       break;
+               case 's':       /* scp */
+                       parlib_wants_to_be_mcp = FALSE;
+                       break;
+               case 't':
+                       test = 1;
+                       break;
+               case 'h':
+               default:
+                       usage();
+                       break;
                }
        }
        argc -= optind;
        argv += optind;
-       if (argc < 1) {
-               fprintf(stderr, "Usage: %s vmimage [-n (no vmcall printf)]\n", argv[0]);
-               exit(1);
+       if ((!test) && (argc < 1)) {
+               usage();
        }
 
+       init_lemu_logging(dune_debug);
+       init_linuxemu();
+
        if ((uintptr_t)(memstart + memsize) >= (uintptr_t)BRK_START) {
                fprintf(stderr,
                        "memstart 0x%lx memsize 0x%lx -> 0x%lx is too large; overlaps BRK_START at %p\n",
@@ -218,77 +298,51 @@ int main(int argc, char **argv)
                exit(1);
        }
 
-       ram = mmap((void *)memstart, memsize,
-                  PROT_READ | PROT_WRITE | PROT_EXEC,
-                  MAP_POPULATE | MAP_ANONYMOUS, -1, 0);
-       if (ram != (void *)memstart) {
-               fprintf(stderr, "Could not mmap 0x%lx bytes at 0x%lx\n",
-                       memsize, memstart);
-               exit(1);
-       }
+       mmap_memory(&vm, memstart, memsize);
 
-       entry = load_kernel(argv[0]);
-       if (entry == 0) {
-               fprintf(stderr, "Unable to load kernel %s\n", argv[0]);
-               exit(1);
-       }
+       if (dune_debug)
+               fprintf(stderr, "mmap guest physical memory at %p for 0x%lx bytes\n",
+                       memstart, memsize);
 
-       vm->nr_gpcs = 1;
-       vm->gpcis = &gpci;
-       ret = vmm_init(vm, vmmflags);
-       if (ret) {
-               fprintf(stderr, "vmm_init failed: %r\n");
-               exit(1);
-       }
+       // TODO: find out why we can't use memstart + memsize as TOS.
+       tos = (void *)(memstart + 0x800000);
 
-       /* Allocate 3 pages for page table pages: a page of 512 GiB
-        * PTEs with only one entry filled to point to a page of 1 GiB
-        * PTEs; a page of 1 GiB PTEs with only one entry filled to
-        * point to a page of 2 MiB PTEs; and a page of 2 MiB PTEs,
-        * all of which may be filled. For now, we don't handle
-        * starting addresses not aligned on 512 GiB boundaries or
-        * sizes > GiB */
-       ret = posix_memalign((void **)&p512, PGSIZE, 3 * PGSIZE);
-       if (ret) {
-               perror("ptp alloc");
+       for (envc = 0; environ[envc]; envc++)
+               ;
+       auxv = (struct elf_aux *)&environ[envc+1];
+       for (auxc = 0; auxv[auxc].v[0]; auxc++)
+               ;
+       auxv = buildaux(auxv, auxc, extra, extrac);
+       if (!auxv) {
+               fprintf(stderr, "Can't build auxv: %r");
                exit(1);
        }
+       auxc = auxc + extrac;
 
-       /* Set up a 1:1 ("identity") page mapping from guest virtual
-        * to guest physical using the (host virtual)
-        * `kerneladdress`. This mapping may be used for only a short
-        * time, until the guest sets up its own page tables. Be aware
-        * that the values stored in the table are physical addresses.
-        * This is subtle and mistakes are easily disguised due to the
-        * identity mapping, so take care when manipulating these
-        * mappings. */
-       p1 = &p512[NPTENTRIES];
-       p2m = &p512[2 * NPTENTRIES];
-
-       fprintf(stderr, "Map %p for %zu bytes\n", memstart, memsize);
-       /* TODO: fix this nested loop so it's correct for more than
-        * one GiB. */
-       for(uintptr_t p4 = memstart; p4 < memstart + memsize;
-           p4 += PML4_PTE_REACH) {
-               p512[PML4(p4)] = (uint64_t)p1 | PTE_KERN_RW;
-               for (uintptr_t p3 = p4; p3 < memstart + memsize;
-                    p3 += PML3_PTE_REACH) {
-                       p1[PML3(p3)] = (uint64_t)p2m | PTE_KERN_RW;
-                       for (uintptr_t p2 = p3; p2 < memstart + memsize; p2 += PML2_PTE_REACH) {
-                               p2m[PML2(p2)] =
-                                       (uint64_t)(p2) | PTE_KERN_RW | PTE_PS;
-                       }
+       if (!test) {
+               entry = load_elf(argv[0], MinMemory, NULL, NULL);
+               if (entry == 0) {
+                       fprintf(stderr, "Unable to load kernel %s\n", argv[0]);
+                       exit(1);
                }
+       } else {
+               fprintf(stderr, "Running dune test\n");
+               entry = (uintptr_t) dune_test;
        }
+       if (dune_debug)
+               fprintf(stderr, "Test: Populate stack at %p\n", tos);
+       tos = populate_stack(tos, ac, av, envc, environ, auxc, auxv);
+       if (dune_debug)
+               fprintf(stderr, "populated stack at %p; argc %d, envc %d, auxc %d\n",
+                       tos, ac, envc, auxc);
 
-       fprintf(stderr, "p512 %p p512[0] is 0x%lx p1 %p p1[0] is 0x%x\n", p512, p512[0], p1, p1[0]);
+       if (dune_debug)
+               fprintf(stderr, "stack is %p\n", tos);
 
-       vm_tf = gth_to_vmtf(vm->gths[0]);
-       vm_tf->tf_cr3 = (uint64_t) p512;
-       vm_tf->tf_rip = entry;
-       vm_tf->tf_rsp = stack;
-       vm_tf->tf_rsi = (uint64_t) 0;
-       start_guest_thread(vm->gths[0]);
+       gpci_init(gpci);
+       vth = vthread_alloc(&vm, gpci);
+       vthread_init_ctx(vth, entry, (uintptr_t)tos, (uintptr_t)tos);
+       vthread_run(vth);
 
        uthread_sleep_forever();
        return 0;